[llvm] 26fd693 - RegisterCoalescer: Fix creating full / empty subrange on undef subreg use (#117936)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 28 08:12:22 PST 2024


Author: Matt Arsenault
Date: 2024-11-28T11:12:19-05:00
New Revision: 26fd693b979f17c83cbd5a3313fdea950ce3d356

URL: https://github.com/llvm/llvm-project/commit/26fd693b979f17c83cbd5a3313fdea950ce3d356
DIFF: https://github.com/llvm/llvm-project/commit/26fd693b979f17c83cbd5a3313fdea950ce3d356.diff

LOG: RegisterCoalescer: Fix creating full / empty subrange on undef subreg use (#117936)

Added: 
    llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir

Modified: 
    llvm/lib/CodeGen/RegisterCoalescer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 073ce367af1b85..5de873fd41578e 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1888,7 +1888,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
 
       // A subreg use of a partially undef (super) register may be a complete
       // undef use now and then has to be marked that way.
-      if (MO.isUse() && !DstIsPhys) {
+      if (MO.isUse() && !MO.isUndef() && !DstIsPhys) {
         unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
         if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
           if (!DstInt->hasSubRanges()) {

diff  --git a/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir b/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir
new file mode 100644
index 00000000000000..70b92831a0ecc4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
+
+# Test that an invalid subreg range is not introduced due to the undef
+# %1.sub0 use. An undef use with a subregister index would end up
+# introducing subranges for the empty and full lanemasks.
+
+---
+name:            merge_with_undef_subreg_use_subrange_lanemask_is_invalid
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+  sgprForEXECCopy: '$sgpr100_sgpr101'
+body:             |
+  ; CHECK-LABEL: name: merge_with_undef_subreg_use_subrange_lanemask_is_invalid
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr8_sgpr9
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), addrspace 4)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[S_LOAD_DWORDX4_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]].sub1:sgpr_128 = COPY undef [[S_LOAD_DWORDX4_IMM]].sub0
+  ; CHECK-NEXT:   S_ENDPGM 0, implicit [[S_LOAD_DWORDX4_IMM]]
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), addrspace 4)
+
+  bb.1:
+    %2:sgpr_128 = COPY %1
+    S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    undef %3.sub0:sgpr_128 = S_MOV_B32 0
+    %2:sgpr_128 = COPY killed %3
+
+  bb.3:
+    %4:sgpr_128 = COPY killed %2
+    %4.sub1:sgpr_128 = COPY undef %1.sub0
+    S_ENDPGM 0, implicit %4
+
+...


        


More information about the llvm-commits mailing list