[llvm] e33e087 - [MachineSink] Update register dependency correctly (#109763)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 24 17:42:43 PDT 2024
Author: Ruiling, Song
Date: 2024-09-25T08:42:40+08:00
New Revision: e33e087a175567e88b361fd198536314a0a1fff2
URL: https://github.com/llvm/llvm-project/commit/e33e087a175567e88b361fd198536314a0a1fff2
DIFF: https://github.com/llvm/llvm-project/commit/e33e087a175567e88b361fd198536314a0a1fff2.diff
LOG: [MachineSink] Update register dependency correctly (#109763)
The accumulateUsedDefed() was missing if block prologue interference
check does not pass. This would cause incorrect register dependency,
which cause incorrect sinking.
Added:
llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir
Modified:
llvm/lib/CodeGen/MachineSink.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 609f9af9767f5d..658ebd47488c71 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -2152,8 +2152,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
MachineBasicBlock::iterator InsertPos =
SuccBB->SkipPHIsAndLabels(SuccBB->begin());
if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
- LLVM_DEBUG(
- dbgs() << " *** Not sinking: prologue interference\n");
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
continue;
}
diff --git a/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir b/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir
new file mode 100644
index 00000000000000..14617e066f954c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s
+#
+# In the example, the ` $sgpr4 = COPY $sgpr2` was incorrectly sunk into bb.3. This happened because we did not update
+# register uses when we found that `$sgpr2 = COPY $sgpr3` should not be sunk because of conflict with the successor's
+# prologue instructions.
+---
+name: update_dependency_correctly
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: update_dependency_correctly
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $sgpr0, $sgpr3, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr4 = COPY $sgpr2
+ ; CHECK-NEXT: renamable $sgpr2 = COPY $sgpr3
+ ; CHECK-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, $vgpr1
+ ; CHECK-NEXT: $sgpr1 = S_AND_SAVEEXEC_B32 $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: S_ENDPGM 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0, $sgpr2, $sgpr4, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
+ ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo
+ ; CHECK-NEXT: renamable $sgpr5 = COPY $sgpr1
+ ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 32, 0
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ bb.0:
+ successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $sgpr3, $sgpr2
+
+ $vgpr1 = IMPLICIT_DEF
+
+ renamable $sgpr4 = COPY $sgpr2
+ renamable $sgpr2 = COPY $sgpr3
+
+ $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, $vgpr1
+
+ $sgpr1 = S_AND_SAVEEXEC_B32 $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ S_ENDPGM 0
+
+ bb.3:
+ successors: %bb.2(0x40000000)
+ liveins: $sgpr0, $sgpr2, $sgpr4, $vgpr1
+
+ $sgpr3 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
+
+ renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo
+ renamable $sgpr5 = COPY $sgpr1
+ renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 32, 0
+
+ S_BRANCH %bb.2
+
+...
More information about the llvm-commits
mailing list