[llvm-branch-commits] [llvm] release/19.x: [RegisterCoalescer] Fix SUBREG_TO_REG handling in the RegisterCoalescer. (#96839) (PR #101071)

Mon Jul 29 12:35:46 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-powerpc

Author: None (llvmbot)

<details>
<summary>Changes</summary>

Backport 26fa399

Requested by: @stefanp-ibm

---
Full diff: https://github.com/llvm/llvm-project/pull/101071.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+7) 
- (added) llvm/test/CodeGen/PowerPC/subreg-coalescer.mir (+34) 
- (added) llvm/test/CodeGen/X86/subreg-fail.mir (+37) 


``````````diff

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 1c35a88b4dc4a..043ea20191487 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3673,6 +3673,13 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
 
     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+  } else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) {
+    LHS.createSubRangeFrom(LIS->getVNInfoAllocator(),
+                           CP.getNewRC()->getLaneMask(), LHS);
+    mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), CP,
+                      CP.getDstIdx());
+    LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
+    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
diff --git a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
new file mode 100644
index 0000000000000..39eab1f562e71
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s \
+# RUN:   -verify-coalescing --run-pass=register-coalescer -o - | FileCheck %s
+
+# Check that the register coalescer correctly handles merging live ranges over
+# SUBREG_TO_REG on PowerPC. The -verify-coalescing option will give an error if
+# this is incorrect.
+
+---
+name: check_subregs
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x3
+
+    ; CHECK-LABEL: name: check_subregs
+    ; CHECK: liveins: $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+    ; CHECK-NEXT: [[LFSUX:%[0-9]+]]:f8rc, dead [[LFSUX1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSUX [[COPY]], [[COPY]]
+    ; CHECK-NEXT: undef [[FRSP:%[0-9]+]].sub_64:vslrc = FRSP [[LFSUX]], implicit $rm
+    ; CHECK-NEXT: [[XVCVDPSP:%[0-9]+]]:vrrc = XVCVDPSP [[FRSP]], implicit $rm
+    ; CHECK-NEXT: $v2 = COPY [[XVCVDPSP]]
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2
+    %0:g8rc_and_g8rc_nox0 = COPY $x3
+    %1:f8rc, %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
+    %3:f4rc = FRSP killed %1, implicit $rm
+    %4:vslrc = SUBREG_TO_REG 1, %3, %subreg.sub_64
+    %5:vrrc = XVCVDPSP killed %4, implicit $rm
+    $v2 = COPY %5
+    BLR8 implicit $lr8, implicit $rm, implicit $v2
+...
+
diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir
new file mode 100644
index 0000000000000..c8146f099b814
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple x86_64-unknown-unknown %s \
+# RUN:   -verify-coalescing -enable-subreg-liveness \
+# RUN:   --run-pass=register-coalescer -o - | FileCheck %s
+
+# Check that the register coalescer correctly handles merging live ranges over
+# SUBREG_TO_REG on X86. The -verify-coalescing option will give an error if
+# this is incorrect.
+
+---
+name:            test1
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test1
+    ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags
+    ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg
+    ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags
+    ; CHECK-NEXT: MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, [[LEA64r]].sub_32bit :: (volatile store (s32) into `ptr undef`)
+    ; CHECK-NEXT: RET 0, undef $eax
+    %0:gr32 = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    %2:gr64_nosp = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    %3:gr32 = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    %5:gr64 = SUBREG_TO_REG 0, killed %3, %subreg.sub_32bit
+    %6:gr64 = COPY killed %5
+    %6:gr64 = SHL64ri %6, 32, implicit-def dead $eflags
+    %7:gr64 = LEA64r killed %6, 1, killed %2, 256, $noreg
+    %8:gr64 = COPY killed %7
+    %8:gr64 = SHR64ri %8, 8, implicit-def dead $eflags
+    %9:gr32 = COPY killed %8.sub_32bit
+    MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, killed %9 :: (volatile store (s32) into `ptr undef`)
+    RET 0, undef $eax
+
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/101071