[llvm] [RegisterCoalescer] Fix issue in the RegisterCoalescer. (PR #96839)

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 27 11:36:00 PDT 2024


https://github.com/stefanp-ibm updated https://github.com/llvm/llvm-project/pull/96839

>From bffd35e26a9c15c4b912241ec677f2fa4f821217 Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Wed, 26 Jun 2024 21:38:26 -0500
Subject: [PATCH 1/2] [RegisterCoalescer] Fix issue in the RegisterCoalescer.

Two tests are added to this fix. The X86 test fails without the patch.
The PowerPC test passes with and without the patch but is added as a way
track future possible failures when register classes are changed in a
future patch.
---
 llvm/lib/CodeGen/RegisterCoalescer.cpp        |   8 ++
 .../test/CodeGen/PowerPC/subreg-coalescer.mir |  26 ++++
 llvm/test/CodeGen/X86/subreg-fail.mir         | 124 ++++++++++++++++++
 3 files changed, 158 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
 create mode 100644 llvm/test/CodeGen/X86/subreg-fail.mir

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 3397cb4f3661f..75cdcf811fd40 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3671,6 +3671,14 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
     // having stale segments.
     LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
 
+    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+    RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+  } else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) {
+    LHS.createSubRangeFrom(LIS->getVNInfoAllocator(),
+                           CP.getNewRC()->getLaneMask(), LHS);
+    mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), CP,
+                      CP.getDstIdx());
+    LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
diff --git a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
new file mode 100644
index 0000000000000..8b04997ff335f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
@@ -0,0 +1,26 @@
+# RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 -x mir < %s \
+# RUN:   -verify-machineinstrs --run-pass=register-coalescer -o - | FileCheck %s
+
+---
+name: check_subregs
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x3
+
+    %0:g8rc_and_g8rc_nox0 = COPY $x3
+    %3:f8rc, %4:g8rc_and_g8rc_nox0 = LFSUX %0, %0
+    %5:f4rc = FRSP killed %3, implicit $rm
+    %22:vslrc = SUBREG_TO_REG 1, %5, %subreg.sub_64
+    %11:vrrc = XVCVDPSP killed %22, implicit $rm
+    $v2 = COPY %11
+    BLR8 implicit $lr8, implicit $rm, implicit $v2
+...
+
+# CHECK:         %0:g8rc_and_g8rc_nox0 = COPY $x3
+# CHECK-NEXT:    %1:f8rc, dead %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
+# CHECK-NEXT:    undef %4.sub_64:vslrc = FRSP %1, implicit $rm
+# CHECK-NEXT:    %5:vrrc = XVCVDPSP %4, implicit $rm
+# CHECK-NEXT:    $v2 = COPY %5
+# CHECK-NEXT:    BLR8 implicit $lr8, implicit $rm, implicit $v2
diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir
new file mode 100644
index 0000000000000..a563141d59c9f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -0,0 +1,124 @@
+# RUN: llc -mtriple x86_64-unknown-unknown -x mir < %s \
+# RUN:   -verify-machineinstrs -enable-subreg-liveness=true \
+# RUN:   --run-pass=register-coalescer -o - | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-unknown"
+  
+  %pair = type { i64, double }
+  %t21 = type { ptr }
+  %t13 = type { ptr, %t15, %t15 }
+  %t15 = type { i8, i32, i32 }
+  
+  @__force_order = external hidden global i32, align 4
+  @.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
+  @a = external global i32, align 4
+  @fn1.g = private unnamed_addr constant [9 x ptr] [ptr null, ptr @a, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null], align 16
+  @e = external global i32, align 4
+  @__stack_chk_guard = external dso_local global ptr
+  
+  ; Function Attrs: nounwind ssp
+  define i32 @test1() #0 {
+  entry:
+    %tmp5.i = load volatile i32, ptr undef, align 4
+    %conv.i = zext i32 %tmp5.i to i64
+    %tmp12.i = load volatile i32, ptr undef, align 4
+    %conv13.i = zext i32 %tmp12.i to i64
+    %shl.i = shl i64 %conv13.i, 32
+    %or.i = or i64 %shl.i, %conv.i
+    %add16.i = add i64 %or.i, 256
+    %shr.i = lshr i64 %add16.i, 8
+    %conv19.i = trunc i64 %shr.i to i32
+    store volatile i32 %conv19.i, ptr undef, align 4
+    ret i32 undef
+  }
+...
+---
+name:            test1
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   true
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+  - { id: 1, class: gr64, preferred-register: '' }
+  - { id: 2, class: gr64_nosp, preferred-register: '' }
+  - { id: 3, class: gr32, preferred-register: '' }
+  - { id: 4, class: gr64, preferred-register: '' }
+  - { id: 5, class: gr64, preferred-register: '' }
+  - { id: 6, class: gr64, preferred-register: '' }
+  - { id: 7, class: gr64, preferred-register: '' }
+  - { id: 8, class: gr64, preferred-register: '' }
+  - { id: 9, class: gr32, preferred-register: '' }
+  - { id: 10, class: gr64, preferred-register: '' }
+  - { id: 11, class: gr32, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo:
+  amxProgModel:    None
+body:             |
+  bb.0.entry:
+    %0:gr32 = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    %2:gr64_nosp = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
+    %3:gr32 = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    %5:gr64 = SUBREG_TO_REG 0, killed %3, %subreg.sub_32bit
+    %6:gr64 = COPY killed %5
+    %6:gr64 = SHL64ri %6, 32, implicit-def dead $eflags
+    %7:gr64 = LEA64r killed %6, 1, killed %2, 256, $noreg
+    %8:gr64 = COPY killed %7
+    %8:gr64 = SHR64ri %8, 8, implicit-def dead $eflags
+    %9:gr32 = COPY killed %8.sub_32bit
+    MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, killed %9 :: (volatile store (s32) into `ptr undef`)
+    RET 0, undef $eax
+
+...
+
+# CHECK:         undef %2.sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+# CHECK-NEXT:    undef %6.sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+# CHECK-NEXT:    %6:gr64_with_sub_8bit = SHL64ri %6, 32, implicit-def dead $eflags
+# CHECK-NEXT:    %8:gr64_with_sub_8bit = LEA64r %6, 1, %2, 256, $noreg
+# CHECK-NEXT:    %8:gr64_with_sub_8bit = SHR64ri %8, 8, implicit-def dead $eflags
+# CHECK-NEXT:    MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, %8.sub_32bit :: (volatile store (s32) into `ptr undef`)
+# CHECK-NEXT:    RET 0, undef $eax
+

>From 08baca3725d563bdccec56d98d11651f37ef94f3 Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp at ca.ibm.com>
Date: Thu, 27 Jun 2024 13:34:24 -0500
Subject: [PATCH 2/2] Updated and cleaned up test cases.

---
 llvm/lib/CodeGen/RegisterCoalescer.cpp        |   1 -
 .../test/CodeGen/PowerPC/subreg-coalescer.mir |  30 +++--
 llvm/test/CodeGen/X86/subreg-fail.mir         | 113 ++----------------
 3 files changed, 28 insertions(+), 116 deletions(-)

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 75cdcf811fd40..e406ba0305dad 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3680,7 +3680,6 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
                       CP.getDstIdx());
     LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
-    RHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
diff --git a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
index 8b04997ff335f..f58a0d0cd96a6 100644
--- a/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
+++ b/llvm/test/CodeGen/PowerPC/subreg-coalescer.mir
@@ -1,26 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple powerpc64le-unknown-linux-gnu -mcpu=pwr8 -x mir < %s \
-# RUN:   -verify-machineinstrs --run-pass=register-coalescer -o - | FileCheck %s
+# RUN:   -verify-coalescing --run-pass=register-coalescer -o - | FileCheck %s
 
 ---
 name: check_subregs
 alignment:       16
 tracksRegLiveness: true
 body:             |
-  bb.0.entry:
+  bb.0:
     liveins: $x3
 
+    ; CHECK-LABEL: name: check_subregs
+    ; CHECK: liveins: $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+    ; CHECK-NEXT: [[LFSUX:%[0-9]+]]:f8rc, dead [[LFSUX1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSUX [[COPY]], [[COPY]]
+    ; CHECK-NEXT: undef [[FRSP:%[0-9]+]].sub_64:vslrc = FRSP [[LFSUX]], implicit $rm
+    ; CHECK-NEXT: [[XVCVDPSP:%[0-9]+]]:vrrc = XVCVDPSP [[FRSP]], implicit $rm
+    ; CHECK-NEXT: $v2 = COPY [[XVCVDPSP]]
+    ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $v2
     %0:g8rc_and_g8rc_nox0 = COPY $x3
-    %3:f8rc, %4:g8rc_and_g8rc_nox0 = LFSUX %0, %0
-    %5:f4rc = FRSP killed %3, implicit $rm
-    %22:vslrc = SUBREG_TO_REG 1, %5, %subreg.sub_64
-    %11:vrrc = XVCVDPSP killed %22, implicit $rm
-    $v2 = COPY %11
+    %1:f8rc, %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
+    %3:f4rc = FRSP killed %1, implicit $rm
+    %4:vslrc = SUBREG_TO_REG 1, %3, %subreg.sub_64
+    %5:vrrc = XVCVDPSP killed %4, implicit $rm
+    $v2 = COPY %5
     BLR8 implicit $lr8, implicit $rm, implicit $v2
 ...
 
-# CHECK:         %0:g8rc_and_g8rc_nox0 = COPY $x3
-# CHECK-NEXT:    %1:f8rc, dead %2:g8rc_and_g8rc_nox0 = LFSUX %0, %0
-# CHECK-NEXT:    undef %4.sub_64:vslrc = FRSP %1, implicit $rm
-# CHECK-NEXT:    %5:vrrc = XVCVDPSP %4, implicit $rm
-# CHECK-NEXT:    $v2 = COPY %5
-# CHECK-NEXT:    BLR8 implicit $lr8, implicit $rm, implicit $v2
diff --git a/llvm/test/CodeGen/X86/subreg-fail.mir b/llvm/test/CodeGen/X86/subreg-fail.mir
index a563141d59c9f..7e91bbf350866 100644
--- a/llvm/test/CodeGen/X86/subreg-fail.mir
+++ b/llvm/test/CodeGen/X86/subreg-fail.mir
@@ -1,104 +1,22 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple x86_64-unknown-unknown -x mir < %s \
-# RUN:   -verify-machineinstrs -enable-subreg-liveness=true \
+# RUN:   -verify-coalescing -enable-subreg-liveness=true \
 # RUN:   --run-pass=register-coalescer -o - | FileCheck %s
 
---- |
-  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-  target triple = "x86_64-unknown-unknown"
-  
-  %pair = type { i64, double }
-  %t21 = type { ptr }
-  %t13 = type { ptr, %t15, %t15 }
-  %t15 = type { i8, i32, i32 }
-  
-  @__force_order = external hidden global i32, align 4
-  @.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
-  @a = external global i32, align 4
-  @fn1.g = private unnamed_addr constant [9 x ptr] [ptr null, ptr @a, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null], align 16
-  @e = external global i32, align 4
-  @__stack_chk_guard = external dso_local global ptr
-  
-  ; Function Attrs: nounwind ssp
-  define i32 @test1() #0 {
-  entry:
-    %tmp5.i = load volatile i32, ptr undef, align 4
-    %conv.i = zext i32 %tmp5.i to i64
-    %tmp12.i = load volatile i32, ptr undef, align 4
-    %conv13.i = zext i32 %tmp12.i to i64
-    %shl.i = shl i64 %conv13.i, 32
-    %or.i = or i64 %shl.i, %conv.i
-    %add16.i = add i64 %or.i, 256
-    %shr.i = lshr i64 %add16.i, 8
-    %conv19.i = trunc i64 %shr.i to i32
-    store volatile i32 %conv19.i, ptr undef, align 4
-    ret i32 undef
-  }
-...
 ---
 name:            test1
 alignment:       16
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
 tracksRegLiveness: true
-hasWinCFI:       false
-callsEHReturn:   false
-callsUnwindInit: false
-hasEHCatchret:   false
-hasEHScopes:     false
-hasEHFunclets:   false
-isOutlined:      false
-debugInstrRef:   true
-failsVerification: false
-tracksDebugUserValues: false
-registers:
-  - { id: 0, class: gr32, preferred-register: '' }
-  - { id: 1, class: gr64, preferred-register: '' }
-  - { id: 2, class: gr64_nosp, preferred-register: '' }
-  - { id: 3, class: gr32, preferred-register: '' }
-  - { id: 4, class: gr64, preferred-register: '' }
-  - { id: 5, class: gr64, preferred-register: '' }
-  - { id: 6, class: gr64, preferred-register: '' }
-  - { id: 7, class: gr64, preferred-register: '' }
-  - { id: 8, class: gr64, preferred-register: '' }
-  - { id: 9, class: gr32, preferred-register: '' }
-  - { id: 10, class: gr64, preferred-register: '' }
-  - { id: 11, class: gr32, preferred-register: '' }
-liveins:         []
-frameInfo:
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    1
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  functionContext: ''
-  maxCallFrameSize: 4294967295
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  hasTailCall:     false
-  isCalleeSavedInfoValid: false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           []
-entry_values:    []
-callSites:       []
-debugValueSubstitutions: []
-constants:       []
-machineFunctionInfo:
-  amxProgModel:    None
 body:             |
-  bb.0.entry:
+  bb.0:
+    ; CHECK-LABEL: name: test1
+    ; CHECK: undef [[MOV32rm:%[0-9]+]].sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    ; CHECK-NEXT: undef [[MOV32rm1:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
+    ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr64_with_sub_8bit = SHL64ri [[MOV32rm1]], 32, implicit-def dead $eflags
+    ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = LEA64r [[MOV32rm1]], 1, [[MOV32rm]], 256, $noreg
+    ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_with_sub_8bit = SHR64ri [[LEA64r]], 8, implicit-def dead $eflags
+    ; CHECK-NEXT: MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, [[LEA64r]].sub_32bit :: (volatile store (s32) into `ptr undef`)
+    ; CHECK-NEXT: RET 0, undef $eax
     %0:gr32 = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
     %2:gr64_nosp = SUBREG_TO_REG 0, killed %0, %subreg.sub_32bit
     %3:gr32 = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
@@ -113,12 +31,3 @@ body:             |
     RET 0, undef $eax
 
 ...
-
-# CHECK:         undef %2.sub_32bit:gr64_nosp = MOV32rm undef %1:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
-# CHECK-NEXT:    undef %6.sub_32bit:gr64_with_sub_8bit = MOV32rm undef %4:gr64, 1, $noreg, 0, $noreg :: (volatile load (s32) from `ptr undef`)
-# CHECK-NEXT:    %6:gr64_with_sub_8bit = SHL64ri %6, 32, implicit-def dead $eflags
-# CHECK-NEXT:    %8:gr64_with_sub_8bit = LEA64r %6, 1, %2, 256, $noreg
-# CHECK-NEXT:    %8:gr64_with_sub_8bit = SHR64ri %8, 8, implicit-def dead $eflags
-# CHECK-NEXT:    MOV32mr undef %10:gr64, 1, $noreg, 0, $noreg, %8.sub_32bit :: (volatile store (s32) into `ptr undef`)
-# CHECK-NEXT:    RET 0, undef $eax
-



More information about the llvm-commits mailing list