[llvm] [Coalescer] Consider NewMI's subreg index when updating lanemask. (PR #121780)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 7 04:29:03 PST 2025


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/121780

>From a2c896bdafdefdea9465810cee8c6d07fdcd80d6 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 23 Dec 2024 16:22:49 +0000
Subject: [PATCH 1/4] [Coalescer] NFCI: Move code added in #116191

By moving the code a bit later, we can factor out some of the conditions as
those are now already tested, which will help when adding another fix
on top that uses `NewMI`'s subreg index (NewIdx).

This change is intended to be NFC.
---
 llvm/lib/CodeGen/RegisterCoalescer.cpp | 41 +++++++++++++-------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 20ad6445344d83..712c64bb9cf6d5 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1374,27 +1374,6 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
   MachineInstr &NewMI = *std::prev(MII);
   NewMI.setDebugLoc(DL);
 
-  // In a situation like the following:
-  //
-  //    undef %2.subreg:reg = INST %1:reg         ; DefMI (rematerializable),
-  //                                              ; DefSubIdx = subreg
-  //    %3:reg = COPY %2                          ; SrcIdx = DstIdx = 0
-  //    .... = SOMEINSTR %3:reg
-  //
-  // there are no subranges for %3 so after rematerialization we need
-  // to explicitly create them. Undefined subranges are removed later on.
-  if (DstReg.isVirtual() && DefSubIdx && !CP.getSrcIdx() && !CP.getDstIdx() &&
-      MRI->shouldTrackSubRegLiveness(DstReg)) {
-    LiveInterval &DstInt = LIS->getInterval(DstReg);
-    if (!DstInt.hasSubRanges()) {
-      LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg);
-      LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(DefSubIdx);
-      LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
-      DstInt.createSubRangeFrom(LIS->getVNInfoAllocator(), UsedLanes, DstInt);
-      DstInt.createSubRangeFrom(LIS->getVNInfoAllocator(), UnusedLanes, DstInt);
-    }
-  }
-
   // In a situation like the following:
   //     %0:subreg = instr              ; DefMI, subreg = DstIdx
   //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
@@ -1523,6 +1502,26 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
     // sure that "undef" is not set.
     if (NewIdx == 0)
       NewMI.getOperand(0).setIsUndef(false);
+
+    // In a situation like the following:
+    //
+    //    undef %2.subreg:reg = INST %1:reg         ; DefMI (rematerializable),
+    //                                              ; DefSubIdx = subreg
+    //    %3:reg = COPY %2                          ; SrcIdx = DstIdx = 0
+    //    .... = SOMEINSTR %3:reg
+    //
+    // there are no subranges for %3 so after rematerialization we need
+    // to explicitly create them. Undefined subranges are removed later on.
+    if (DefSubIdx && !CP.getSrcIdx() && !CP.getDstIdx() &&
+        MRI->shouldTrackSubRegLiveness(DstReg) && !DstInt.hasSubRanges()) {
+      LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg);
+      LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(DefSubIdx);
+      LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+      VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
+      DstInt.createSubRangeFrom(Alloc, UsedLanes, DstInt);
+      DstInt.createSubRangeFrom(Alloc, UnusedLanes, DstInt);
+    }
+
     // Add dead subregister definitions if we are defining the whole register
     // but only part of it is live.
     // This could happen if the rematerialization instruction is rematerializing

>From 5babad18cb92f708f93c5a0512918ff5e7368fba Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 6 Jan 2025 14:31:49 +0000
Subject: [PATCH 2/4] Pre-commit tests

---
 ...gister-coalesce-update-subranges-remat.mir | 91 ++++++++++++++++++-
 1 file changed, 90 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
index b61fa4be040070..524f51a2da5af2 100644
--- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
@@ -1,5 +1,5 @@
+# RUN: llc -mtriple=aarch64 -o /dev/null -run-pass=register-coalescer -aarch64-enable-subreg-liveness-tracking -debug-only=regalloc %s 2>&1 | FileCheck %s --check-prefix=CHECK-DBG
 # RUN: llc -mtriple=aarch64 -verify-machineinstrs -o - -run-pass=register-coalescer -aarch64-enable-subreg-liveness-tracking %s | FileCheck %s --check-prefix=CHECK
-# RUN: llc -mtriple=aarch64 -verify-machineinstrs -o /dev/null -run-pass=register-coalescer -aarch64-enable-subreg-liveness-tracking -debug-only=regalloc %s 2>&1 | FileCheck %s --check-prefix=CHECK-DBG
 # REQUIRES: asserts
 
 # CHECK-DBG: ********** REGISTER COALESCER **********
@@ -36,3 +36,92 @@ body:             |
     RET_ReallyLR
 
 ...
+# CHECK-DBG: ********** REGISTER COALESCER **********
+# CHECK-DBG: ********** Function: reproducer
+# CHECK-DBG: ********** JOINING INTERVALS ***********
+# CHECK-DBG: ********** INTERVALS **********
+# CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0 at 48B-phi 1 at 320r 2 at 32r
+# CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
+# CHECK-DBG-SAME: L0000000000000080 [80r,160B:2)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at x 2 at 80r 3 at 304B-phi
+# CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
+# CHECK-DBG-SAME: weight:0.000000e+00
+---
+name:              reproducer
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %0:gpr32 = MOVi32imm 1
+    %1:gpr64 = IMPLICIT_DEF
+
+  bb.1:
+
+  bb.2:
+    %3:gpr64all = SUBREG_TO_REG 0, %0, %subreg.sub_32
+
+  bb.3:
+    $nzcv = IMPLICIT_DEF
+    %4:gpr64 = COPY killed %3
+    Bcc 1, %bb.7, implicit killed $nzcv
+
+  bb.4:
+    $nzcv = IMPLICIT_DEF
+    Bcc 1, %bb.6, implicit killed $nzcv
+
+  bb.5:
+    %5:gpr64all = SUBREG_TO_REG 0, %0, %subreg.sub_32
+    %4:gpr64 = COPY killed %5
+    B %bb.7
+
+  bb.6:
+    %4:gpr64 = COPY $xzr
+
+  bb.7:
+    %7:gpr64 = ADDXrs killed %1, killed %4, 1
+    %1:gpr64 = COPY killed %7
+    B %bb.1
+
+...
+# CHECK-DBG: ********** REGISTER COALESCER **********
+# CHECK-DBG: ********** Function: reproducer2
+# CHECK-DBG: ********** JOINING INTERVALS ***********
+# CHECK-DBG: ********** INTERVALS **********
+# CHECK-DBG: %1 [32r,48B:2)[48B,304r:0)[304r,352B:1) 0 at 48B-phi 1 at 304r 2 at 32r
+# CHECK-DBG-SAME: weight:0.000000e+00
+# CHECK-DBG: %3 [80r,160B:2)[224r,256B:1)[272r,288B:0)[288B,304r:3) 0 at 272r 1 at 224r 2 at 80r 3 at 288B-phi
+# CHECK-DBG-SAME: weight:0.000000e+00
+---
+name:              reproducer2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %0:gpr32 = MOVi32imm 1
+    %1:gpr64 = IMPLICIT_DEF
+
+  bb.1:
+
+  bb.2:
+    %3:gpr64all = SUBREG_TO_REG 0, %0, %subreg.sub_32
+
+  bb.3:
+    $nzcv = IMPLICIT_DEF
+    %4:gpr64 = COPY killed %3
+    Bcc 1, %bb.7, implicit killed $nzcv
+
+  bb.4:
+    $nzcv = IMPLICIT_DEF
+    Bcc 1, %bb.6, implicit killed $nzcv
+
+  bb.5:
+    %4:gpr64 = IMPLICIT_DEF
+    B %bb.7
+
+  bb.6:
+    %4:gpr64 = COPY $xzr
+
+  bb.7:
+    %5:gpr64 = ADDXrs killed %1, killed %4, 1
+    %1:gpr64 = COPY killed %5
+    B %bb.1
+
+...

>From b7afd88e94e1b80d35b4eb7b712976200857ad3a Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 17 Dec 2024 21:01:20 +0000
Subject: [PATCH 3/4] [Coalescer] Consider NewMI's subreg index when updating
 lanemask.

The code added in #116191 that updated the lanemasks for rematerialized
values, checked if DefMI's destination reg had a subreg index.
This seems to have missed the following case:

  %0:gpr32 = MOVi32imm 1
  %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32

which during rematerialization would have the following variables set:

  DefMI = %0:gpr32 = MOVi32imm 1

  NewMI = %3.sub_32:gpr64 = MOVi32imm 1   (rematerialized value)

When checking whether the lanemasks need to be generated, considering
whether DefMI's destination has a subreg index is insufficient, we should
look at DefMI's subreg index instead.

The added tests are a bit more involved, because I was not able to
reconstruct the issue without having some control flow in the test.
These tests come from actual reproducers.
---
 llvm/lib/CodeGen/RegisterCoalescer.cpp                    | 8 ++++----
 .../AArch64/register-coalesce-update-subranges-remat.mir  | 4 +++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 712c64bb9cf6d5..df7193eae6fbc3 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1508,14 +1508,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
     //    undef %2.subreg:reg = INST %1:reg         ; DefMI (rematerializable),
     //                                              ; DefSubIdx = subreg
     //    %3:reg = COPY %2                          ; SrcIdx = DstIdx = 0
-    //    .... = SOMEINSTR %3:reg
+    //    .... = SOMEINSTR %3:reg, %2
     //
     // there are no subranges for %3 so after rematerialization we need
     // to explicitly create them. Undefined subranges are removed later on.
-    if (DefSubIdx && !CP.getSrcIdx() && !CP.getDstIdx() &&
-        MRI->shouldTrackSubRegLiveness(DstReg) && !DstInt.hasSubRanges()) {
+    if (NewIdx && !DstInt.hasSubRanges() &&
+        MRI->shouldTrackSubRegLiveness(DstReg)) {
       LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg);
-      LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(DefSubIdx);
+      LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(NewIdx);
       LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
       VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
       DstInt.createSubRangeFrom(Alloc, UsedLanes, DstInt);
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
index 524f51a2da5af2..08fc47d9480ce9 100644
--- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
@@ -43,7 +43,7 @@ body:             |
 # CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0 at 48B-phi 1 at 320r 2 at 32r
 # CHECK-DBG-SAME: weight:0.000000e+00
 # CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
-# CHECK-DBG-SAME: L0000000000000080 [80r,160B:2)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at x 2 at 80r 3 at 304B-phi
+# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0 at 288r 1 at x 2 at x 3 at 304B-phi
 # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0 at 288r 1 at 240r 2 at 80r 3 at 304B-phi
 # CHECK-DBG-SAME: weight:0.000000e+00
 ---
@@ -89,6 +89,8 @@ body:             |
 # CHECK-DBG: %1 [32r,48B:2)[48B,304r:0)[304r,352B:1) 0 at 48B-phi 1 at 304r 2 at 32r
 # CHECK-DBG-SAME: weight:0.000000e+00
 # CHECK-DBG: %3 [80r,160B:2)[224r,256B:1)[272r,288B:0)[288B,304r:3) 0 at 272r 1 at 224r 2 at 80r 3 at 288B-phi
+# CHECK-DBG-SAME: L0000000000000080 [224r,256B:1)[272r,288B:0)[288B,304r:3) 0 at 272r 1 at 224r 2 at x 3 at 288B-phi
+# CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[224r,256B:1)[272r,288B:0)[288B,304r:3) 0 at 272r 1 at 224r 2 at 80r 3 at 288B-phi
 # CHECK-DBG-SAME: weight:0.000000e+00
 ---
 name:              reproducer2

>From f9e3ada4c4a3cf3221214b9c4e4db2f2fc26633e Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 7 Jan 2025 12:27:09 +0000
Subject: [PATCH 4/4] Update comment

---
 llvm/lib/CodeGen/RegisterCoalescer.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index df7193eae6fbc3..c337be05a66bbf 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1506,9 +1506,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
     // In a situation like the following:
     //
     //    undef %2.subreg:reg = INST %1:reg         ; DefMI (rematerializable),
-    //                                              ; DefSubIdx = subreg
-    //    %3:reg = COPY %2                          ; SrcIdx = DstIdx = 0
-    //    .... = SOMEINSTR %3:reg, %2
+    //                                              ; Defines only some of lanes,
+    //                                              ; so DefSubIdx = NewIdx = subreg
+    //    %3:reg = COPY %2                          ; Copy full reg
+    //    .... = SOMEINSTR %3:reg                   ; Use full reg
     //
     // there are no subranges for %3 so after rematerialization we need
     // to explicitly create them. Undefined subranges are removed later on.



More information about the llvm-commits mailing list