[llvm-branch-commits] [llvm] Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" (PR #134408)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Apr 4 09:08:30 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc

@llvm/pr-subscribers-backend-aarch64

Author: Sander de Smalen (sdesmalen-arm)

<details>
<summary>Changes</summary>

I had to previously revert #<!-- -->123632 due to failures on X86 and it took me a while before I had the time to get back to this. 

This PR tries to reland the original patch, with additional fixes. The PR is structured as follows:
* The `git revert`ed patch (with tests updated)
* A fix to only add the implicit-def when tracking subreg-liveness of the destination register.
* A fix to only add the implicit-def when the destination register is not dead.
* Updated tests after latest rebase.

The PR depends on #<!-- -->131361, which was split off as a separate PR.

---

Patch is 141.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134408.diff


31 Files Affected:

- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+70-16) 
- (modified) llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll (+5-5) 
- (added) llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll (+51) 
- (added) llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir (+30) 
- (modified) llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir (+55-3) 
- (modified) llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (+3-4) 
- (modified) llvm/test/CodeGen/AMDGPU/fptosi.f16.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/fptoui.f16.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll (+11-12) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll (+11-12) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+6-10) 
- (modified) llvm/test/CodeGen/AMDGPU/select.f16.ll (+53-55) 
- (modified) llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll (+6-6) 
- (modified) llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll (+4) 
- (modified) llvm/test/CodeGen/PowerPC/build-vector-tests.ll (+48) 
- (modified) llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll (+6) 
- (modified) llvm/test/CodeGen/PowerPC/combine-fneg.ll (+1) 
- (modified) llvm/test/CodeGen/PowerPC/fp-strict-round.ll (+6) 
- (modified) llvm/test/CodeGen/PowerPC/frem.ll (+3) 
- (modified) llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll (+1) 
- (modified) llvm/test/CodeGen/PowerPC/ldexp.ll (+2) 
- (modified) llvm/test/CodeGen/PowerPC/llvm.modf.ll (+1) 
- (modified) llvm/test/CodeGen/PowerPC/vec_insert_elt.ll (+4) 
- (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+176) 
- (added) llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll (+185) 
- (added) llvm/test/CodeGen/X86/coalescer-subreg-to-reg-implicit-def-regression.mir (+62) 
- (added) llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir (+47) 
- (added) llvm/test/CodeGen/X86/pr76416.ll (+79) 
- (modified) llvm/test/CodeGen/X86/subreg-fail.mir (+2-2) 
- (added) llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir (+372) 


``````````diff
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index dbd354f2ca2c4..963f5620d8dba 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -306,7 +306,11 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
   /// number if it is not zero. If DstReg is a physical register and the
   /// existing subregister number of the def / use being updated is not zero,
   /// make sure to set it to the correct physical subregister.
-  void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+  ///
+  /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
+  /// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
+  void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
+                         bool IsSubregToReg);
 
   /// If the given machine operand reads only undefined lanes add an undef
   /// flag.
@@ -1444,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
 
   // CopyMI may have implicit operands, save them so that we can transfer them
   // over to the newly materialized instruction after CopyMI is removed.
+  LaneBitmask NewMIImplicitOpsMask;
   SmallVector<MachineOperand, 4> ImplicitOps;
   ImplicitOps.reserve(CopyMI->getNumOperands() -
                       CopyMI->getDesc().getNumOperands());
@@ -1458,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
               (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
              "unexpected implicit virtual register def");
       ImplicitOps.push_back(MO);
+      if (MO.isDef() && MO.getReg().isVirtual() &&
+          MRI->shouldTrackSubRegLiveness(DstReg))
+        NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
     }
   }
 
@@ -1500,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
       } else {
         assert(MO.getReg() == NewMI.getOperand(0).getReg());
 
-        // We're only expecting another def of the main output, so the range
-        // should get updated with the regular output range.
-        //
-        // FIXME: The range updating below probably needs updating to look at
-        // the super register if subranges are tracked.
-        assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
-               "subrange update for implicit-def of super register may not be "
-               "properly handled");
+        // If lanemasks need to be tracked, compile the lanemask of the NewMI
+        // implicit def operands to avoid subranges for the super-regs from
+        // being removed by code later on in this function.
+        if (MRI->shouldTrackSubRegLiveness(MO.getReg()))
+          NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
       }
     }
   }
@@ -1531,7 +1536,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
     MRI->setRegClass(DstReg, NewRC);
 
     // Update machine operands and add flags.
-    updateRegDefsUses(DstReg, DstReg, DstIdx);
+    updateRegDefsUses(DstReg, DstReg, DstIdx, false);
     NewMI.getOperand(0).setSubReg(NewIdx);
     // updateRegDefUses can add an "undef" flag to the definition, since
     // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
@@ -1607,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
       VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
-        if ((SR.LaneMask & DstMask).none()) {
+        if ((SR.LaneMask & DstMask).none() &&
+            (SR.LaneMask & NewMIImplicitOpsMask).none()) {
           LLVM_DEBUG(dbgs()
                      << "Removing undefined SubRange "
                      << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1872,7 +1878,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
 }
 
 void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
-                                          unsigned SubIdx) {
+                                          unsigned SubIdx, bool IsSubregToReg) {
   bool DstIsPhys = DstReg.isPhysical();
   LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
 
@@ -1892,6 +1898,14 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
     }
   }
 
+  // If DstInt already has a subrange for the unused lanes, then we shouldn't
+  // create duplicate subranges when we update the interval for unused lanes.
+  LaneBitmask DefinedLanes;
+  if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+    for (LiveInterval::SubRange &SR : DstInt->subranges())
+      DefinedLanes |= SR.LaneMask;
+  }
+
   SmallPtrSet<MachineInstr *, 8> Visited;
   for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
                                                E = MRI->reg_instr_end();
@@ -1915,6 +1929,9 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
     if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
       Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
 
+    bool FullDef = true;
+    bool DeadDef = false;
+
     // Replace SrcReg with DstReg in all UseMI operands.
     for (unsigned Op : Ops) {
       MachineOperand &MO = UseMI->getOperand(Op);
@@ -1922,8 +1939,11 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
       // Adjust <undef> flags in case of sub-register joins. We don't want to
       // turn a full def into a read-modify-write sub-register def and vice
       // versa.
-      if (SubIdx && MO.isDef())
+      if (SubIdx && MO.isDef()) {
         MO.setIsUndef(!Reads);
+        FullDef = false;
+        DeadDef = MO.isDead();
+      }
 
       // A subreg use of a partially undef (super) register may be a complete
       // undef use now and then has to be marked that way.
@@ -1956,6 +1976,35 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
         MO.substVirtReg(DstReg, SubIdx, *TRI);
     }
 
+    if (IsSubregToReg && !FullDef && !DeadDef) {
+      // If the coalesed instruction doesn't fully define the register, we need
+      // to preserve the original super register liveness for SUBREG_TO_REG.
+      //
+      // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
+      // but it introduces liveness for other subregisters. Downstream users may
+      // have been relying on those bits, so we need to ensure their liveness is
+      // captured with a def of other lanes.
+      //
+      // The implicit-def only needs adding if we track subregister liveness
+      // for this register, otherwise there is no point.
+
+      if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+        assert(DstInt->hasSubRanges() &&
+               "SUBREG_TO_REG should have resulted in subrange");
+        LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+        LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+        LaneBitmask UnusedLanes = DstMask & ~UsedLanes & ~DefinedLanes;
+        if ((UnusedLanes).any()) {
+          BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+          DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
+          DefinedLanes |= UnusedLanes;
+        }
+
+        MachineInstrBuilder MIB(*MF, UseMI);
+        MIB.addReg(DstReg, RegState::ImplicitDefine);
+      }
+    }
+
     LLVM_DEBUG({
       dbgs() << "\t\tupdated: ";
       if (!UseMI->isDebugInstr())
@@ -2157,6 +2206,8 @@ bool RegisterCoalescer::joinCopy(
     });
   }
 
+  const bool IsSubregToReg = CopyMI->isSubregToReg();
+
   ShrinkMask = LaneBitmask::getNone();
   ShrinkMainRange = false;
 
@@ -2226,9 +2277,12 @@ bool RegisterCoalescer::joinCopy(
 
   // Rewrite all SrcReg operands to DstReg.
   // Also update DstReg operands to include DstIdx if it is set.
-  if (CP.getDstIdx())
-    updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
-  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+  if (CP.getDstIdx()) {
+    assert(!IsSubregToReg && "can this happen?");
+    updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
+  }
+  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
+                    IsSubregToReg);
 
   // Shrink subregister ranges if necessary.
   if (ShrinkMask.any()) {
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 0f208f8ed9052..374def5d3cdb6 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s
+; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true  < %s | FileCheck %s
 
 ; Check there's no assert in spilling from implicit-def operands on an
 ; IMPLICIT_DEF.
@@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
 ; CHECK-NEXT:    ldr x8, [sp, #40] ; 8-byte Folded Reload
 ; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    mov x1, xzr
-; CHECK-NEXT:    ; kill: def $w8 killed $w8 killed $x8 def $x8
 ; CHECK-NEXT:    str x8, [sp]
 ; CHECK-NEXT:    bl _fprintf
 ; CHECK-NEXT:    brk #0x1
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index 2a77d4dd33fe5..4206c0bc26991 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp {
 ; CHECK-NEXT:    sub sp, sp, #208
 ; CHECK-NEXT:    mov w8, #10 ; =0xa
 ; CHECK-NEXT:    mov w9, #9 ; =0x9
-; CHECK-NEXT:    mov w10, #8 ; =0x8
+; CHECK-NEXT:    mov w0, #1 ; =0x1
 ; CHECK-NEXT:    stp x9, x8, [sp, #24]
-; CHECK-NEXT:    mov w8, #7 ; =0x7
+; CHECK-NEXT:    mov w8, #8 ; =0x8
 ; CHECK-NEXT:    mov w9, #6 ; =0x6
-; CHECK-NEXT:    mov w0, #1 ; =0x1
+; CHECK-NEXT:    str x8, [sp, #16]
+; CHECK-NEXT:    mov w8, #7 ; =0x7
 ; CHECK-NEXT:    mov w1, #2 ; =0x2
 ; CHECK-NEXT:    mov w2, #3 ; =0x3
 ; CHECK-NEXT:    mov w3, #4 ; =0x4
@@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp {
 ; CHECK-NEXT:    stp x22, x21, [sp, #160] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #176] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #192] ; 16-byte Folded Spill
-; CHECK-NEXT:    stp x8, x10, [sp, #8]
-; CHECK-NEXT:    str x9, [sp]
+; CHECK-NEXT:    stp x9, x8, [sp]
 ; CHECK-NEXT:    bl _callee
 ; CHECK-NEXT:    ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll b/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll
new file mode 100644
index 0000000000000..942b408b5f39c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -enable-subreg-liveness=false  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness=true < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @_ZN4llvm5APInt6divideEPKmjS2_jPmS3_(i32 %lhsWords, i32 %rhsWords) {
+; CHECK-LABEL: _ZN4llvm5APInt6divideEPKmjS2_jPmS3_:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w9, w0, #1
+; CHECK-NEXT:    mov w10, #1 // =0x1
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    sub w9, w9, w1, lsl #1
+; CHECK-NEXT:    bfi w0, w8, #1, #31
+; CHECK-NEXT:    lsr w9, w9, #1
+; CHECK-NEXT:    bfi w10, w9, #2, #30
+; CHECK-NEXT:    cmp w10, #0
+; CHECK-NEXT:    b.hs .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %if.then15
+; CHECK-NEXT:    lsl x8, x0, #2
+; CHECK-NEXT:    ldr xzr, [x8]
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    b _Znam
+  %mul = shl i32 %rhsWords, 1
+  %mul1 = shl i32 %lhsWords, 1
+  %sub = sub i32 %mul1, %mul
+  %add7 = or i32 %mul1, 1
+  %idxprom = zext i32 %add7 to i64
+  %mul3 = shl i32 %sub, 1
+  %add4 = or i32 %mul3, 1
+  %1 = icmp ult i32 %add4, 0
+  br i1 %1, label %if.then15, label %3
+
+common.ret:                                       ; preds = %3, %if.then15
+  ret void
+
+if.then15:                                        ; preds = %0
+  %idxprom12 = zext i32 %add7 to i64
+  %arrayidx13 = getelementptr [128 x i32], ptr null, i64 0, i64 %idxprom12
+  %2 = load volatile ptr, ptr %arrayidx13, align 8
+  br label %common.ret
+
+3:                                                ; preds = %0
+  %call = tail call ptr @_Znam(i64 %idxprom)
+  br label %common.ret
+}
+
+declare ptr @_Znam(i64)
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
new file mode 100644
index 0000000000000..678d76527fa81
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s --check-prefix=SRLT
+# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s --check-prefix=NOSRLT
+---
+name: test
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x1
+    ; SRLT-LABEL: name: test
+    ; SRLT: liveins: $x1
+    ; SRLT-NEXT: {{  $}}
+    ; SRLT-NEXT: renamable $x0 = COPY $x1
+    ; SRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def $x1
+    ; SRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0
+    ;
+    ; NOSRLT-LABEL: name: test
+    ; NOSRLT: liveins: $x1
+    ; NOSRLT-NEXT: {{  $}}
+    ; NOSRLT-NEXT: renamable $x0 = COPY $x1
+    ; NOSRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
+    ; NOSRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0
+    %190:gpr64 = COPY killed $x1
+    %191:gpr32 = COPY %190.sub_32:gpr64
+    %192:gpr32 = ORRWrr $wzr, killed %191:gpr32
+    %193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32
+    $x0 = COPY killed %190:gpr64
+    $x1 = COPY killed %193:gpr64all
+    RET_ReallyLR implicit $x1, implicit $x0
+...
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
index 08fc47d9480ce..abf739fb9095e 100644
--- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
@@ -7,8 +7,8 @@
 # CHECK-DBG: ********** JOINING INTERVALS ***********
 # CHECK-DBG: ********** INTERVALS **********
 # CHECK-DBG: %0 [16r,32r:0) 0 at 16r  weight:0.000000e+00
-# CHECK-DBG: %3 [48r,112r:0) 0 at 48r  L0000000000000040 [48r,112r:0) 0 at 48r  weight:0.000000e+00
-# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r  L0000000000000080 [112e,112d:0) 0 at 112e  L0000000000000040 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r  weight:0.000000e+00
+# CHECK-DBG: %3 [48r,112r:0) 0 at 48r  L0000000000000080 [48r,112r:0) 0 at 48r  L0000000000000040 [48r,112r:0) 0 at 48r  weight:0.000000e+00
+# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r  L0000000000000080 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r  L0000000000000040 [80r,112e:1)[112e,112d:0) 0 at 112e 1 at 80r  weight:0.000000e+00
 # CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0 at 112r 1 at 32r  weight:0.000000e+00
 ---
 name:            test
@@ -43,7 +43,7 @@ body:             |
 # CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0 at 48B-phi 1 at 320r 2 at 32r
 # CHECK-DBG-SAME: weight:0.000000e+00
 # CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
-# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0 at 288r 1 at x 2 at x 3 at 304B-phi
+# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at x 3 at 304B-phi
 # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
 # CHECK-DBG-SAME: weight:0.000000e+00
 ---
@@ -127,3 +127,55 @@ body:             |
     B %bb.1
 
 ...
+# Test that the interval `L0000000000000080 [112r,112d:1)` is not removed,
+# when removing undefined subranges.
+#
+# CHECK-DBG: ********** REGISTER COALESCER **********
+# CHECK-DBG: ********** Function: reproducer3
+# CHECK-DBG: ********** JOINING INTERVALS ***********
+# CHECK-DBG: ********** INTERVALS **********
+# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0 at 0B-phi 1 at 320r
+# CHECK-DBG: W1 [0B,16r:0) 0 at 0B-phi
+# CHECK-DBG: %0 [16r,64r:0) 0 at 16r  weight:0.000000e+00
+# CHECK-DBG: %1 [32r,128r:0) 0 at 32r  weight:0.000000e+00
+# CHECK-DBG: %2 [48r,64r:0) 0 at 48r  weight:0.000000e+00
+# CHECK-DBG: %3 [64r,80r:0) 0 at 64r  weight:0.000000e+00
+# CHECK-DBG: %4 [80r,176r:0) 0 at 80r  weight:0.000000e+00
+# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0 at 128r 1 at 112r
+# CHECK-DBG-SAME: L0000000000000080 [112r,112d:1)[128r,256r:0)[304B,320r:0) 0 at 128r 1 at 112r
+# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0 at 128r 1 at 112r
+# CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0 at 176r 1 at 96r  weight:0.000000e+00
+# CHECK-DBG: %9 [256r,272r:0) 0 at 256r  weight:0.000000e+00
+---
+name:            reproducer3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+
+    %0:gpr32 = COPY killed $w1
+    %1:gpr32 = COPY killed $w0
+    %3:gpr32 = UBFMWri %1, 31, 30
+    %4:gpr32 = SUBWrs killed %3, killed %0, 1
+    %5:gpr32 = UBFMWri killed %4, 1, 31
+    %6:gpr32 = MOVi32imm 1
+    %7:gpr32 = COPY %6
+    %7:gpr32 = BFMWri %7, killed %1, 31, 30
+    %8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32
+    %9:gpr32common = COPY killed %6
+    %9:gpr32common = BFMWri %9, killed %5, 30, 29
+    dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv
+    Bcc 2, %bb.2, implicit killed $nzcv
+    B %bb.1
+
+  bb.1:
+    %10:gpr64common = UBFMXri killed %8, 62, 61
+    dead $xzr = LDRXui killed %10, 0
+    RET_ReallyLR
+
+  bb.2:
+    $x0 = COPY killed %8
+    RET_ReallyLR implicit killed $x0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index c739ba2183ef9..86ef27a1522f5 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -329,11 +329,10 @@ define <2 x half> @chain_hi_to_lo_global() {
 ; GFX11-TRUE16:       ; %bb.0: ; %bb
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v0, 2
-; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX11-TRUE16-NEXT:    global_load_d16_b16 v0, v[0:1], off
-; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v2, 0
-; GFX11-TRUE16-NEXT:    global_load_d16_hi_b16 v0, v[1:2], off
+; GFX11-TRUE16-NEXT:    global_load_d16_hi_b16 v0, v[2:3], off
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index f84e14ea62273..d5f983c2f5648 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -328,13 +328,13 @@ define amdgpu_kernel void @fptosi_v2f16_to_v2i16(
 ; GFX11-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
 ; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s1
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-TRUE16-NEXT:    v_l...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/134408


More information about the llvm-branch-commits mailing list