[llvm-branch-commits] [llvm] Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (PR #134408)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Apr 4 09:08:30 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
@llvm/pr-subscribers-backend-aarch64
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
I had to previously revert #<!-- -->123632 due to failures on X86 and it took me a while before I had the time to get back to this.
This PR tries to reland the original patch, with additional fixes. The PR is structured as follows:
* The `git revert`ed patch (with tests updated)
* A fix to only add the implicit-def when tracking subreg-liveness of the destination register.
* A fix to only add the implicit-def when the destination register is not dead.
* Updated tests after latest rebase.
The PR depends on #<!-- -->131361, which was split off as a separate PR.
---
Patch is 141.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134408.diff
31 Files Affected:
- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+70-16)
- (modified) llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll (+5-5)
- (added) llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll (+51)
- (added) llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir (+30)
- (modified) llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir (+55-3)
- (modified) llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (+3-4)
- (modified) llvm/test/CodeGen/AMDGPU/fptosi.f16.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/fptoui.f16.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll (+11-12)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll (+11-12)
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+6-10)
- (modified) llvm/test/CodeGen/AMDGPU/select.f16.ll (+53-55)
- (modified) llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll (+6-6)
- (modified) llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll (+4)
- (modified) llvm/test/CodeGen/PowerPC/build-vector-tests.ll (+48)
- (modified) llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll (+6)
- (modified) llvm/test/CodeGen/PowerPC/combine-fneg.ll (+1)
- (modified) llvm/test/CodeGen/PowerPC/fp-strict-round.ll (+6)
- (modified) llvm/test/CodeGen/PowerPC/frem.ll (+3)
- (modified) llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll (+1)
- (modified) llvm/test/CodeGen/PowerPC/ldexp.ll (+2)
- (modified) llvm/test/CodeGen/PowerPC/llvm.modf.ll (+1)
- (modified) llvm/test/CodeGen/PowerPC/vec_insert_elt.ll (+4)
- (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+176)
- (added) llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll (+185)
- (added) llvm/test/CodeGen/X86/coalescer-subreg-to-reg-implicit-def-regression.mir (+62)
- (added) llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir (+47)
- (added) llvm/test/CodeGen/X86/pr76416.ll (+79)
- (modified) llvm/test/CodeGen/X86/subreg-fail.mir (+2-2)
- (added) llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir (+372)
``````````diff
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index dbd354f2ca2c4..963f5620d8dba 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -306,7 +306,11 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+ ///
+ /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
+ /// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
+ bool IsSubregToReg);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1444,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
+ LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
@@ -1458,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
+ if (MO.isDef() && MO.getReg().isVirtual() &&
+ MRI->shouldTrackSubRegLiveness(DstReg))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
@@ -1500,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());
- // We're only expecting another def of the main output, so the range
- // should get updated with the regular output range.
- //
- // FIXME: The range updating below probably needs updating to look at
- // the super register if subranges are tracked.
- assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
- "subrange update for implicit-def of super register may not be "
- "properly handled");
+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
+ // implicit def operands to avoid subranges for the super-regs from
+ // being removed by code later on in this function.
+ if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
}
@@ -1531,7 +1536,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MRI->setRegClass(DstReg, NewRC);
// Update machine operands and add flags.
- updateRegDefsUses(DstReg, DstReg, DstIdx);
+ updateRegDefsUses(DstReg, DstReg, DstIdx, false);
NewMI.getOperand(0).setSubReg(NewIdx);
// updateRegDefUses can add an "undef" flag to the definition, since
// it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
@@ -1607,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
- if ((SR.LaneMask & DstMask).none()) {
+ if ((SR.LaneMask & DstMask).none() &&
+ (SR.LaneMask & NewMIImplicitOpsMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1872,7 +1878,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
- unsigned SubIdx) {
+ unsigned SubIdx, bool IsSubregToReg) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1892,6 +1898,14 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
}
}
+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
+ // create duplicate subranges when we update the interval for unused lanes.
+ LaneBitmask DefinedLanes;
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ for (LiveInterval::SubRange &SR : DstInt->subranges())
+ DefinedLanes |= SR.LaneMask;
+ }
+
SmallPtrSet<MachineInstr *, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
E = MRI->reg_instr_end();
@@ -1915,6 +1929,9 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
+ bool FullDef = true;
+ bool DeadDef = false;
+
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
@@ -1922,8 +1939,11 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// Adjust <undef> flags in case of sub-register joins. We don't want to
// turn a full def into a read-modify-write sub-register def and vice
// versa.
- if (SubIdx && MO.isDef())
+ if (SubIdx && MO.isDef()) {
MO.setIsUndef(!Reads);
+ FullDef = false;
+ DeadDef = MO.isDead();
+ }
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
@@ -1956,6 +1976,35 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
MO.substVirtReg(DstReg, SubIdx, *TRI);
}
+ if (IsSubregToReg && !FullDef && !DeadDef) {
+ // If the coalesed instruction doesn't fully define the register, we need
+ // to preserve the original super register liveness for SUBREG_TO_REG.
+ //
+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
+ // but it introduces liveness for other subregisters. Downstream users may
+ // have been relying on those bits, so we need to ensure their liveness is
+ // captured with a def of other lanes.
+ //
+ // The implicit-def only needs adding if we track subregister liveness
+ // for this register, otherwise there is no point.
+
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ assert(DstInt->hasSubRanges() &&
+ "SUBREG_TO_REG should have resulted in subrange");
+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes & ~DefinedLanes;
+ if ((UnusedLanes).any()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
+ DefinedLanes |= UnusedLanes;
+ }
+
+ MachineInstrBuilder MIB(*MF, UseMI);
+ MIB.addReg(DstReg, RegState::ImplicitDefine);
+ }
+ }
+
LLVM_DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugInstr())
@@ -2157,6 +2206,8 @@ bool RegisterCoalescer::joinCopy(
});
}
+ const bool IsSubregToReg = CopyMI->isSubregToReg();
+
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2226,9 +2277,12 @@ bool RegisterCoalescer::joinCopy(
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx())
- updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+ if (CP.getDstIdx()) {
+ assert(!IsSubregToReg && "can this happen?");
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
+ }
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
+ IsSubregToReg);
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 0f208f8ed9052..374def5d3cdb6 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true < %s | FileCheck %s
; Check there's no assert in spilling from implicit-def operands on an
; IMPLICIT_DEF.
@@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Folded Reload
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl _fprintf
; CHECK-NEXT: brk #0x1
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index 2a77d4dd33fe5..4206c0bc26991 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: sub sp, sp, #208
; CHECK-NEXT: mov w8, #10 ; =0xa
; CHECK-NEXT: mov w9, #9 ; =0x9
-; CHECK-NEXT: mov w10, #8 ; =0x8
+; CHECK-NEXT: mov w0, #1 ; =0x1
; CHECK-NEXT: stp x9, x8, [sp, #24]
-; CHECK-NEXT: mov w8, #7 ; =0x7
+; CHECK-NEXT: mov w8, #8 ; =0x8
; CHECK-NEXT: mov w9, #6 ; =0x6
-; CHECK-NEXT: mov w0, #1 ; =0x1
+; CHECK-NEXT: str x8, [sp, #16]
+; CHECK-NEXT: mov w8, #7 ; =0x7
; CHECK-NEXT: mov w1, #2 ; =0x2
; CHECK-NEXT: mov w2, #3 ; =0x3
; CHECK-NEXT: mov w3, #4 ; =0x4
@@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill
-; CHECK-NEXT: stp x8, x10, [sp, #8]
-; CHECK-NEXT: str x9, [sp]
+; CHECK-NEXT: stp x9, x8, [sp]
; CHECK-NEXT: bl _callee
; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll b/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll
new file mode 100644
index 0000000000000..942b408b5f39c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -enable-subreg-liveness=false < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness=true < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @_ZN4llvm5APInt6divideEPKmjS2_jPmS3_(i32 %lhsWords, i32 %rhsWords) {
+; CHECK-LABEL: _ZN4llvm5APInt6divideEPKmjS2_jPmS3_:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsl w9, w0, #1
+; CHECK-NEXT: mov w10, #1 // =0x1
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: sub w9, w9, w1, lsl #1
+; CHECK-NEXT: bfi w0, w8, #1, #31
+; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: bfi w10, w9, #2, #30
+; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: b.hs .LBB0_2
+; CHECK-NEXT: // %bb.1: // %if.then15
+; CHECK-NEXT: lsl x8, x0, #2
+; CHECK-NEXT: ldr xzr, [x8]
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: b _Znam
+ %mul = shl i32 %rhsWords, 1
+ %mul1 = shl i32 %lhsWords, 1
+ %sub = sub i32 %mul1, %mul
+ %add7 = or i32 %mul1, 1
+ %idxprom = zext i32 %add7 to i64
+ %mul3 = shl i32 %sub, 1
+ %add4 = or i32 %mul3, 1
+ %1 = icmp ult i32 %add4, 0
+ br i1 %1, label %if.then15, label %3
+
+common.ret: ; preds = %3, %if.then15
+ ret void
+
+if.then15: ; preds = %0
+ %idxprom12 = zext i32 %add7 to i64
+ %arrayidx13 = getelementptr [128 x i32], ptr null, i64 0, i64 %idxprom12
+ %2 = load volatile ptr, ptr %arrayidx13, align 8
+ br label %common.ret
+
+3: ; preds = %0
+ %call = tail call ptr @_Znam(i64 %idxprom)
+ br label %common.ret
+}
+
+declare ptr @_Znam(i64)
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
new file mode 100644
index 0000000000000..678d76527fa81
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s --check-prefix=SRLT
+# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s --check-prefix=NOSRLT
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x1
+ ; SRLT-LABEL: name: test
+ ; SRLT: liveins: $x1
+ ; SRLT-NEXT: {{ $}}
+ ; SRLT-NEXT: renamable $x0 = COPY $x1
+ ; SRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def $x1
+ ; SRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0
+ ;
+ ; NOSRLT-LABEL: name: test
+ ; NOSRLT: liveins: $x1
+ ; NOSRLT-NEXT: {{ $}}
+ ; NOSRLT-NEXT: renamable $x0 = COPY $x1
+ ; NOSRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
+ ; NOSRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0
+ %190:gpr64 = COPY killed $x1
+ %191:gpr32 = COPY %190.sub_32:gpr64
+ %192:gpr32 = ORRWrr $wzr, killed %191:gpr32
+ %193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32
+ $x0 = COPY killed %190:gpr64
+ $x1 = COPY killed %193:gpr64all
+ RET_ReallyLR implicit $x1, implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
index 08fc47d9480ce..abf739fb9095e 100644
--- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
@@ -7,8 +7,8 @@
# CHECK-DBG: ********** JOINING INTERVALS ***********
# CHECK-DBG: ********** INTERVALS **********
# CHECK-DBG: %0 [16r,32r:0) 0 at 16r weight:0.000000e+00
-# CHECK-DBG: %3 [48r,112r:0) 0 at 48r L0000000000000040 [48r,112r:0) 0 at 48r weight:0.000000e+00
-# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r L0000000000000080 [112e,112d:0) 0 at 112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r weight:0.000000e+00
+# CHECK-DBG: %3 [48r,112r:0) 0 at 48r L0000000000000080 [48r,112r:0) 0 at 48r L0000000000000040 [48r,112r:0) 0 at 48r weight:0.000000e+00
+# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r L0000000000000080 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r L0000000000000040 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r weight:0.000000e+00
# CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0 at 112r 1 at 32r weight:0.000000e+00
---
name: test
@@ -43,7 +43,7 @@ body: |
# CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0 at 48B-phi 1 at 320r 2 at 32r
# CHECK-DBG-SAME: weight:0.000000e+00
# CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
-# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0 at 288r 1 at x 2 at x 3 at 304B-phi
+# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at x 3 at 304B-phi
# CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
# CHECK-DBG-SAME: weight:0.000000e+00
---
@@ -127,3 +127,55 @@ body: |
B %bb.1
...
+# Test that the interval `L0000000000000080 [112r,112d:1)` is not removed,
+# when removing undefined subranges.
+#
+# CHECK-DBG: ********** REGISTER COALESCER **********
+# CHECK-DBG: ********** Function: reproducer3
+# CHECK-DBG: ********** JOINING INTERVALS ***********
+# CHECK-DBG: ********** INTERVALS **********
+# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0 at 0B-phi 1 at 320r
+# CHECK-DBG: W1 [0B,16r:0) 0 at 0B-phi
+# CHECK-DBG: %0 [16r,64r:0) 0 at 16r weight:0.000000e+00
+# CHECK-DBG: %1 [32r,128r:0) 0 at 32r weight:0.000000e+00
+# CHECK-DBG: %2 [48r,64r:0) 0 at 48r weight:0.000000e+00
+# CHECK-DBG: %3 [64r,80r:0) 0 at 64r weight:0.000000e+00
+# CHECK-DBG: %4 [80r,176r:0) 0 at 80r weight:0.000000e+00
+# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0 at 128r 1 at 112r
+# CHECK-DBG-SAME: L0000000000000080 [112r,112d:1)[128r,256r:0)[304B,320r:0) 0 at 128r 1 at 112r
+# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0 at 128r 1 at 112r
+# CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0 at 176r 1 at 96r weight:0.000000e+00
+# CHECK-DBG: %9 [256r,272r:0) 0 at 256r weight:0.000000e+00
+---
+name: reproducer3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ %0:gpr32 = COPY killed $w1
+ %1:gpr32 = COPY killed $w0
+ %3:gpr32 = UBFMWri %1, 31, 30
+ %4:gpr32 = SUBWrs killed %3, killed %0, 1
+ %5:gpr32 = UBFMWri killed %4, 1, 31
+ %6:gpr32 = MOVi32imm 1
+ %7:gpr32 = COPY %6
+ %7:gpr32 = BFMWri %7, killed %1, 31, 30
+ %8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32
+ %9:gpr32common = COPY killed %6
+ %9:gpr32common = BFMWri %9, killed %5, 30, 29
+ dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv
+ Bcc 2, %bb.2, implicit killed $nzcv
+ B %bb.1
+
+ bb.1:
+ %10:gpr64common = UBFMXri killed %8, 62, 61
+ dead $xzr = LDRXui killed %10, 0
+ RET_ReallyLR
+
+ bb.2:
+ $x0 = COPY killed %8
+ RET_ReallyLR implicit killed $x0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index c739ba2183ef9..86ef27a1522f5 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -329,11 +329,10 @@ define <2 x half> @chain_hi_to_lo_global() {
; GFX11-TRUE16: ; %bb.0: ; %bb
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0
; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
-; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[1:2], off
+; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[2:3], off
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index f84e14ea62273..d5f983c2f5648 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -328,13 +328,13 @@ define amdgpu_kernel void @fptosi_v2f16_to_v2i16(
; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT: v_l...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/134408
More information about the llvm-branch-commits
mailing list