[llvm] RegisterCoalescer: Fix implicit operand handling during rematerialize (PR #75271)

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 12 18:09:50 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-regalloc

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

If the rematerialize was placing a subregister into a super register,
and implicit operands referenced the original register, we need to add
undef flags to the now-subregister indexed implicit operands.

Depends #<!-- -->75152

---
Full diff: https://github.com/llvm/llvm-project/pull/75271.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+16-2) 
- (added) llvm/test/CodeGen/X86/coalescer-partial-redundancy-clear-dead-flag-undef-copy.mir (+47) 
- (added) llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir (+123) 


``````````diff
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c067d87a9fd810..c1af37c8510ff5 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1201,6 +1201,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
                       << printMBBReference(MBB) << '\t' << CopyMI);
   }
 
+  const bool IsUndefCopy = CopyMI.getOperand(1).isUndef();
+
   // Remove CopyMI.
   // Note: This is fine to remove the copy before updating the live-ranges.
   // While updating the live-ranges, we only look at slot indices and
@@ -1214,6 +1216,19 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
   LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
                   &EndPoints);
   BValNo->markUnused();
+
+  if (IsUndefCopy) {
+    // We're introducing an undef phi def, and need to set undef on any users of
+    // the previously local def to avoid artifically extending the lifetime
+    // through the block.
+    for (MachineOperand &MO : MRI->use_nodbg_operands(IntB.reg())) {
+      const MachineInstr &MI = *MO.getParent();
+      SlotIndex UseIdx = LIS->getInstructionIndex(MI);
+      if (!IntB.liveAt(UseIdx))
+        MO.setIsUndef(true);
+    }
+  }
+
   // Extend IntB to the EndPoints of its original live interval.
   LIS->extendToIndices(IntB, EndPoints);
 
@@ -1596,8 +1611,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
   }
 
-  if (NewMI.getOperand(0).getSubReg())
-    NewMI.getOperand(0).setIsUndef();
+  NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());
 
   // Transfer over implicit operands to the rematerialized instruction.
   for (MachineOperand &MO : ImplicitOps)
diff --git a/llvm/test/CodeGen/X86/coalescer-partial-redundancy-clear-dead-flag-undef-copy.mir b/llvm/test/CodeGen/X86/coalescer-partial-redundancy-clear-dead-flag-undef-copy.mir
new file mode 100644
index 00000000000000..5f33be0bc15559
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-partial-redundancy-clear-dead-flag-undef-copy.mir
@@ -0,0 +1,47 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=x86_64-pc-linux-gnu -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s
+
+# Check for "Live range continues after dead def flag".
+
+# There are 2 copies of undef, but the registers also appear to be
+# live due to block live outs, and thus were not deleted as
+# eliminateUndefCopy only considered the live range, and not the undef
+# flag.
+#
+# removePartialRedundancy would move the COPY undef %0 in bb.1 to
+# bb.0.  The live range of %1 would then be extended to be live out of
+# %bb.1 for the backedge phi. This would then fail the verifier, since
+# the dead flag was no longer valid. This was fixed by directly
+# considering the undef flag to avoid considering this special case.
+
+---
+name: partial_redundancy_coalesce_undef_copy_live_out
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: partial_redundancy_coalesce_undef_copy_live_out
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $rdi
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr32 = COPY $rdi
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri undef [[XOR32ri]], 1, implicit-def dead $eflags
+  ; CHECK-NEXT:   dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[COPY]]
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  bb.0:
+    liveins: $rdi
+
+    %0:gr32 = COPY $rdi
+
+  bb.1:
+    %1:gr32 = COPY undef %0
+    dead %1:gr32 = XOR32ri %1, 1, implicit-def dead $eflags
+    dead %2:gr32 = MOV32rr killed %0
+    %0:gr32 = COPY killed undef %1
+    JMP_1 %bb.1
+
+...
diff --git a/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir b/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir
new file mode 100644
index 00000000000000..42d17130412b61
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir
@@ -0,0 +1,123 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing -run-pass=register-coalescer -o - %s | FileCheck %s
+
+# The %1 = MOV32r0 is rematerialized as a subregister of %2. The
+# implicit-def %1 operand needs to have an undef added, just like the
+# main result operand.
+
+---
+name:           remat_into_subregister_set_undef_implicit_operand_subregisters
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $rdi
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]]
+  ; CHECK-NEXT:   undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_32bit, implicit-def [[MOV32r0_2]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   JCC_1 %bb.4, 5, implicit killed undef $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
+  ; CHECK-NEXT:   dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  bb.0:
+    liveins: $rdi
+
+    undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+    %1:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def %1
+    undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+
+  bb.1:
+    %2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags
+
+  bb.2:
+    JCC_1 %bb.4, 5, implicit killed undef $eflags
+
+  bb.3:
+
+  bb.4:
+    dead %3:gr32 = MOV32rr %1
+    dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
+    %1:gr32 = COPY %2.sub_32bit
+    JMP_1 %bb.1
+
+...
+
+# Same, except the implicit-def on the original instruction already
+# has a subregister index.
+
+---
+name:           remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $rdi
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_1]].sub_8bit
+  ; CHECK-NEXT:   undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_8bit, implicit-def [[MOV32r0_2]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   JCC_1 %bb.4, 5, implicit killed undef $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
+  ; CHECK-NEXT:   dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  bb.0:
+    liveins: $rdi
+
+    undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+    %1:gr32 = MOV32r0 implicit-def dead $eflags, undef implicit-def %1.sub_8bit
+    undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+
+  bb.1:
+    %2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags
+
+  bb.2:
+    JCC_1 %bb.4, 5, implicit killed undef $eflags
+
+  bb.3:
+
+  bb.4:
+    dead %3:gr32 = MOV32rr %1
+    dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
+    %1:gr32 = COPY %2.sub_32bit
+    JMP_1 %bb.1
+
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/75271


More information about the llvm-commits mailing list