[llvm] RegisterCoalescer: Fix creating full / empty subrange on undef subreg use (PR #117936)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 27 14:50:12 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-regalloc

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/117936.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+1-1) 
- (added) llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir (+59) 


``````````diff
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 2e1f498c090d1a..8dd1a0bf0837fb 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1866,7 +1866,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
 
       // A subreg use of a partially undef (super) register may be a complete
       // undef use now and then has to be marked that way.
-      if (MO.isUse() && !DstIsPhys) {
+      if (MO.isUse() && !MO.isUndef() && !DstIsPhys) {
         unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
         if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
           if (!DstInt->hasSubRanges()) {
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir b/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir
new file mode 100644
index 00000000000000..70b92831a0ecc4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-undef-subreg-use-invalid-lanemask.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
+
+# Test that an invalid subreg range is not introduced due to the undef
+# %1.sub0 use. An undef use with a subregister index would end up
+# introducing subranges for the empty and full lanemasks.
+
+---
+name:            merge_with_undef_subreg_use_subrange_lanemask_is_invalid
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+  sgprForEXECCopy: '$sgpr100_sgpr101'
+body:             |
+  ; CHECK-LABEL: name: merge_with_undef_subreg_use_subrange_lanemask_is_invalid
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr8_sgpr9
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), addrspace 4)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[S_LOAD_DWORDX4_IMM:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]].sub1:sgpr_128 = COPY undef [[S_LOAD_DWORDX4_IMM]].sub0
+  ; CHECK-NEXT:   S_ENDPGM 0, implicit [[S_LOAD_DWORDX4_IMM]]
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    %0:sgpr_64 = COPY $sgpr8_sgpr9
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), addrspace 4)
+
+  bb.1:
+    %2:sgpr_128 = COPY %1
+    S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    undef %3.sub0:sgpr_128 = S_MOV_B32 0
+    %2:sgpr_128 = COPY killed %3
+
+  bb.3:
+    %4:sgpr_128 = COPY killed %2
+    %4.sub1:sgpr_128 = COPY undef %1.sub0
+    S_ENDPGM 0, implicit %4
+
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/117936


More information about the llvm-commits mailing list