[llvm] 39fbb5c - RegisterCoalescer: Fix not setting undef on coalesced subregister uses
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 3 10:54:49 PST 2021
Author: Matt Arsenault
Date: 2021-02-03T13:54:43-05:00
New Revision: 39fbb5c3e307ac06c7ca83aca8e3c76ed99b25f3
URL: https://github.com/llvm/llvm-project/commit/39fbb5c3e307ac06c7ca83aca8e3c76ed99b25f3
DIFF: https://github.com/llvm/llvm-project/commit/39fbb5c3e307ac06c7ca83aca8e3c76ed99b25f3.diff
LOG: RegisterCoalescer: Fix not setting undef on coalesced subregister uses
This was only adding undef to the use if the copy itself had a
subregister index. It did not consider the subrange liveness if the
use had a subreg index to begin with.
Added:
llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir
Modified:
llvm/lib/CodeGen/RegisterCoalescer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 6b87681528ee..5f770d6b0bc0 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1768,24 +1768,27 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
- if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
- if (!DstInt->hasSubRanges()) {
- BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
- LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
- LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
- DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
- // The unused lanes are just empty live-ranges at this point.
- // It is the caller responsibility to set the proper
- // dead segments if there is an actual dead def of the
- // unused lanes. This may happen with rematerialization.
- DstInt->createSubRange(Allocator, UnusedLanes);
+ if (MO.isUse() && !DstIsPhys) {
+ unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
+ if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ if (!DstInt->hasSubRanges()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+ DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
+ // The unused lanes are just empty live-ranges at this point.
+ // It is the caller responsibility to set the proper
+ // dead segments if there is an actual dead def of the
+ // unused lanes. This may happen with rematerialization.
+ DstInt->createSubRange(Allocator, UnusedLanes);
+ }
+ SlotIndex MIIdx = UseMI->isDebugValue()
+ ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
+ : LIS->getInstructionIndex(*UseMI);
+ SlotIndex UseIdx = MIIdx.getRegSlot(true);
+ addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
}
- SlotIndex MIIdx = UseMI->isDebugValue()
- ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
- : LIS->getInstructionIndex(*UseMI);
- SlotIndex UseIdx = MIIdx.getRegSlot(true);
- addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
}
if (DstIsPhys)
diff --git a/llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir b/llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir
new file mode 100644
index 000000000000..c60d8297f57d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir
@@ -0,0 +1,81 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -verify-coalescing -run-pass=simple-register-coalescing -o - %s | FileCheck %s
+
+# The copy from %0 to %1 introduces liveness for %3.sub2. After
+# coalescing, the use of %1.sub2 needs to be marked undef. The
+# subregless copy previously did not consider the existing subregister
+# on the use operand.
+
+---
+name: undef_subreg_use_after_full_copy_coalesce_0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: undef_subreg_use_after_full_copy_coalesce_0
+ ; CHECK: undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: dead %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: S_ENDPGM 0, implicit undef %0.sub2
+ undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+ %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+ %1:vreg_96 = COPY killed %0
+ S_ENDPGM 0, implicit %1.sub2
+
+...
+
+# Same, except coalesced copy has a subregister index that needs to be
+# composed with the use index.
+---
+name: undef_subreg_use_after_full_copy_coalesce_composed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: undef_subreg_use_after_full_copy_coalesce_composed
+ ; CHECK: undef %0.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: dead %0.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: S_ENDPGM 0, implicit undef %2.sub1:vreg_64
+ undef %0.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+ %0.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+ %1:vreg_128 = COPY killed %0
+ %2:vreg_64 = COPY killed %1.sub2_sub3
+ S_ENDPGM 0, implicit %2.sub1
+
+...
+
+# FIXME: Initial computed range is wrong for %0.sub2_sub3 and fails
+# verifier.
+# ---
+# name: undef_subreg_use_after_full_copy_coalesce_composed2
+# tracksRegLiveness: true
+# body: |
+# bb.0:
+# undef %0.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+# %0.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+# %1:vreg_128 = COPY killed %0.sub2_sub3
+# S_ENDPGM 0, implicit %1.sub1
+
+# ...
+
+---
+name: undef_subreg_use_after_full_copy_coalesce_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: undef_subreg_use_after_full_copy_coalesce_1
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK: undef %2.sub0:vreg_96 = COPY $vgpr0
+ ; CHECK: %2.sub1:vreg_96 = COPY $vgpr1
+ ; CHECK: S_NOP 0, implicit undef %2.sub2
+ ; CHECK: S_NOP 0, implicit %2.sub1
+ ; CHECK: S_ENDPGM 0
+ %0:vgpr_32 = COPY killed $vgpr0
+ %1:vgpr_32 = COPY killed $vgpr1
+ undef %2.sub0:vreg_96 = COPY killed %0
+ %2.sub1:vreg_96 = COPY killed %1
+ %3:vreg_96 = COPY killed %2
+ S_NOP 0, implicit %3.sub2
+ S_NOP 0, implicit %3.sub1
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list