[llvm] 8e64d84 - [MachineSink] Check block prologue interference
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 21 19:16:51 PDT 2022
Author: Carl Ritson
Date: 2022-03-22T11:15:37+09:00
New Revision: 8e64d84995ddb317f8c79825069a3d2b0005551a
URL: https://github.com/llvm/llvm-project/commit/8e64d84995ddb317f8c79825069a3d2b0005551a
DIFF: https://github.com/llvm/llvm-project/commit/8e64d84995ddb317f8c79825069a3d2b0005551a.diff
LOG: [MachineSink] Check block prologue interference
Sinking must check for interference between the block prologue
and the instruction being sunk.
Specifically check for clobbering of uses by the prologue, and
overwrites to prologue defined registers by the sunk instruction.
Reviewed By: rampitec, ruiling
Differential Revision: https://reviews.llvm.org/D121277
Added:
Modified:
llvm/lib/CodeGen/MachineSink.cpp
llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 995caae1599f4..966b012725d81 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -1294,6 +1294,45 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
return true;
}
+/// Return true if a target defined block prologue instruction interferes
+/// with a sink candidate.
+static bool blockPrologueInterferes(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator End,
+ MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ const MachineRegisterInfo *MRI) {
+ if (BB->begin() == End)
+ return false; // no prologue
+ for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
+ // Only check target defined prologue instructions
+ if (!TII->isBasicBlockPrologue(*PI))
+ continue;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ if (Register::isPhysicalRegister(Reg) &&
+ (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
+ continue;
+ if (PI->modifiesRegister(Reg, TRI))
+ return true;
+ } else {
+ if (PI->readsRegister(Reg, TRI))
+ return true;
+ // Check for interference with non-dead defs
+ auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+ if (DefOp && !DefOp->isDead())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -1407,6 +1446,10 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// Determine where to insert into. Skip phi nodes.
MachineBasicBlock::iterator InsertPos =
SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+ if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
+ LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
+ return false;
+ }
// Collect debug users of any vreg that this inst defines.
SmallVector<MIRegs, 4> DbgUsersToSink;
@@ -1805,11 +1848,19 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
auto DbgValsToSink = DbgValsToSinkMap.takeVector();
+ LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
+
+ MachineBasicBlock::iterator InsertPos =
+ SuccBB->SkipPHIsAndLabels(SuccBB->begin());
+ if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
+ LLVM_DEBUG(
+ dbgs() << " *** Not sinking: prologue interference\n");
+ continue;
+ }
+
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
- MachineBasicBlock::iterator InsertPos =
- SuccBB->SkipPHIsAndLabels(SuccBB->begin());
performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
index fee84198afccd..16edbf1d83378 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
+++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
@@ -5,6 +5,7 @@
# past block prologues which would overwrite their uses.
---
+# Make sure COPY to $sgpr9 is not sunk after S_AND_SAVEEXEC_B64.
name: _amdgpu_ps_main
alignment: 1
tracksRegLiveness: true
@@ -17,16 +18,16 @@ body: |
; GFX10-NEXT: successors: %bb.1(0x80000000)
; GFX10-NEXT: liveins: $sgpr4
; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
; GFX10-NEXT: renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, $vgpr5, 0, implicit $mode, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GFX10-NEXT: liveins: $sgpr4:0x0000000000000003, $sgpr6, $sgpr0_sgpr1
+ ; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.2
@@ -70,3 +71,143 @@ body: |
S_ENDPGM 0
...
+---
+# Make sure COPY to $sgpr0_sgpr1 is not sunk after S_AND_SAVEEXEC_B64.
+name: _amdgpu_ps_main2
+alignment: 1
+tracksRegLiveness: true
+registers: []
+liveins:
+ - { reg: '$sgpr4', virtual-reg: '' }
+ - { reg: '$sgpr6_sgpr7', virtual-reg: '' }
+body: |
+ ; GFX10-LABEL: name: _amdgpu_ps_main2
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr6_sgpr7
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4
+ ; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
+ ; GFX10-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7
+ ; GFX10-NEXT: S_BRANCH %bb.1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $m0 = COPY killed renamable $sgpr6
+ ; GFX10-NEXT: S_BRANCH %bb.3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $sgpr4, $sgpr6_sgpr7
+
+ renamable $sgpr9 = COPY $sgpr4
+ renamable $vgpr5 = IMPLICIT_DEF
+ renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x40000000), %bb.8(0x40000000)
+ liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1
+
+ $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ S_CBRANCH_EXECZ %bb.8, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.8(0x40000000)
+ liveins: $sgpr6
+
+ $m0 = COPY killed renamable $sgpr6
+ S_BRANCH %bb.8
+
+ bb.8:
+
+ S_ENDPGM 0
+
+...
+---
+# Make sure COPY to $sgpr2_sgpr3 is not sunk after S_AND_SAVEEXEC_B32.
+name: _amdgpu_ps_main3
+alignment: 1
+tracksRegLiveness: true
+registers: []
+liveins:
+ - { reg: '$sgpr6_sgpr7', virtual-reg: '' }
+ - { reg: '$sgpr8', virtual-reg: '' }
+body: |
+ ; GFX10-LABEL: name: _amdgpu_ps_main3
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr6_sgpr7, $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF
+ ; GFX10-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; GFX10-NEXT: renamable $sgpr2_sgpr3 = COPY $sgpr6_sgpr7
+ ; GFX10-NEXT: S_BRANCH %bb.1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr6, $sgpr8, $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr2 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ ; GFX10-NEXT: S_NOP 0, implicit $sgpr2_sgpr3
+ ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $m0 = COPY killed renamable $sgpr6
+ ; GFX10-NEXT: S_BRANCH %bb.3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $sgpr6_sgpr7, $sgpr8
+
+ renamable $vgpr5 = IMPLICIT_DEF
+ renamable $sgpr0_sgpr1 = IMPLICIT_DEF
+ renamable $sgpr2_sgpr3 = COPY $sgpr6_sgpr7
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x40000000), %bb.8(0x40000000)
+ liveins: $sgpr6, $sgpr8, $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ $sgpr2 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+ $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ S_NOP 0, implicit $sgpr2_sgpr3
+ S_CBRANCH_EXECZ %bb.8, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.8(0x40000000)
+ liveins: $sgpr6
+
+ $m0 = COPY killed renamable $sgpr6
+ S_BRANCH %bb.8
+
+ bb.8:
+
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list