[llvm] r367500 - Reapply "AMDGPU: Split block for si_end_cf"
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 18:25:27 PDT 2019
Author: arsenm
Date: Wed Jul 31 18:25:27 2019
New Revision: 367500
URL: http://llvm.org/viewvc/llvm-project?rev=367500&view=rev
Log:
Reapply "AMDGPU: Split block for si_end_cf"
This reverts commit r359363, reapplying r357634
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=367500&r1=367499&r2=367500&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed Jul 31 18:25:27 2019
@@ -1397,6 +1397,12 @@ bool SIInstrInfo::expandPostRAPseudo(Mac
MI.setDesc(get(AMDGPU::S_OR_B32));
break;
+ case AMDGPU::S_OR_B64_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_OR_B64));
+ break;
+
case AMDGPU::S_ANDN2_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
@@ -1889,6 +1895,7 @@ bool SIInstrInfo::analyzeBranch(MachineB
case AMDGPU::SI_MASK_BRANCH:
case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_XOR_B64_term:
+ case AMDGPU::S_OR_B64_term:
case AMDGPU::S_ANDN2_B64_term:
case AMDGPU::S_MOV_B32_term:
case AMDGPU::S_XOR_B32_term:
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=367500&r1=367499&r2=367500&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Wed Jul 31 18:25:27 2019
@@ -193,6 +193,7 @@ class WrapTerminatorInst<SOP_Pseudo base
let WaveSizePredicate = isWave64 in {
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
+def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp?rev=367500&r1=367499&r2=367500&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp Wed Jul 31 18:25:27 2019
@@ -55,6 +55,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -79,12 +80,16 @@ class SILowerControlFlow : public Machin
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
- LiveIntervals *LIS = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ LiveIntervals *LIS = nullptr;
+ MachineDominatorTree *DT = nullptr;
+ MachineLoopInfo *MLI = nullptr;
+
const TargetRegisterClass *BoolRC = nullptr;
unsigned AndOpc;
unsigned OrOpc;
+ unsigned OrTermOpc;
unsigned XorOpc;
unsigned MovTermOpc;
unsigned Andn2TermOpc;
@@ -121,7 +126,7 @@ public:
AU.addPreservedID(LiveVariablesID);
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
- AU.setPreservesCFG();
+
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -249,7 +254,7 @@ void SILowerControlFlow::emitIf(MachineI
LIS->InsertMachineInstrInMaps(*SetExec);
LIS->InsertMachineInstrInMaps(*NewBr);
- LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
+ LIS->removeAllRegUnitsForPhysReg(Exec);
MI.eraseFromParent();
// FIXME: Is there a better way of adjusting the liveness? It shouldn't be
@@ -333,7 +338,7 @@ void SILowerControlFlow::emitElse(Machin
LIS->createAndComputeVirtRegInterval(SaveReg);
// Let this be recomputed.
- LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
+ LIS->removeAllRegUnitsForPhysReg(Exec);
}
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
@@ -398,23 +403,99 @@ void SILowerControlFlow::emitLoop(Machin
MI.eraseFromParent();
}
+// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB
+// is split as necessary to keep the exec modification in its own block.
+static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB,
+ MachineInstr &MI,
+ MachineInstr *NewMI,
+ MachineDominatorTree *DT,
+ LiveIntervals *LIS,
+ MachineLoopInfo *MLI) {
+ assert(NewMI->isTerminator());
+
+ MachineBasicBlock::iterator InsPt = MI.getIterator();
+ if (std::next(MI.getIterator()) == MBB.end()) {
+ // Don't bother with a new block.
+ MBB.insert(InsPt, NewMI);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ MI.eraseFromParent();
+ return &MBB;
+ }
+
+ MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock *SplitMBB
+ = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MachineFunction::iterator(MBB), SplitMBB);
+
+ // FIXME: This is working around a MachineDominatorTree API defect.
+ //
+ // If a previous pass split a critical edge, it may not have been applied to
+ // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects
+ // the CFG of the given block. Make sure to call a dominator tree method that
+ // will flush this cache before touching the successors of the block.
+ MachineDomTreeNode *NodeMBB = nullptr;
+ if (DT)
+ NodeMBB = DT->getNode(&MBB);
+
+ // Move everything to the new block, except the end_cf pseudo.
+ SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end());
+
+ SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ MBB.addSuccessor(SplitMBB, BranchProbability::getOne());
+
+ MBB.insert(MBB.end(), NewMI);
+
+ if (DT) {
+ std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren();
+ DT->addNewBlock(SplitMBB, &MBB);
+
+ // Reparent all of the children to the new block body.
+ auto *SplitNode = DT->getNode(SplitMBB);
+ for (auto *Child : Children)
+ DT->changeImmediateDominator(Child, SplitNode);
+ }
+
+ if (MLI) {
+ if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
+ Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase());
+ }
+
+ if (LIS) {
+ LIS->insertMBBInMaps(SplitMBB);
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ }
+
+ // All live-ins are forwarded.
+ for (auto &LiveIn : MBB.liveins())
+ SplitMBB->addLiveIn(LiveIn);
+
+ MI.eraseFromParent();
+ return SplitMBB;
+}
+
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator InsPt = MBB.begin();
- MachineInstr *NewMI =
- BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
- .addReg(Exec)
- .add(MI.getOperand(0));
+ // First, move the instruction. It's unnecessarily difficult to update
+ // LiveIntervals when there's a change in control flow, so move the
+ // instruction before changing the blocks.
+ MBB.splice(InsPt, &MBB, MI.getIterator());
if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ LIS->handleMove(MI);
- MI.eraseFromParent();
+ MachineFunction *MF = MBB.getParent();
- if (LIS)
- LIS->handleMove(*NewMI);
+ // Create instruction without inserting it yet.
+ MachineInstr *NewMI
+ = BuildMI(*MF, DL, TII->get(OrTermOpc), Exec)
+ .addReg(Exec)
+ .add(MI.getOperand(0));
+ insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI);
}
// Returns replace operands for a logical operation, either single result
@@ -436,7 +517,7 @@ void SILowerControlFlow::findMaskOperand
// A copy with implcitly defined exec inserted earlier is an exclusion, it
// does not really modify exec.
for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
- if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
+ if (I->modifiesRegister(Exec, TRI) &&
!(I->isCopy() && I->getOperand(0).getReg() != Exec))
return;
@@ -480,12 +561,16 @@ bool SILowerControlFlow::runOnMachineFun
// This doesn't actually need LiveIntervals, but we can preserve them.
LIS = getAnalysisIfAvailable<LiveIntervals>();
+ DT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
MRI = &MF.getRegInfo();
BoolRC = TRI->getBoolRC();
if (ST.isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
OrOpc = AMDGPU::S_OR_B32;
+ OrTermOpc = AMDGPU::S_OR_B32_term;
XorOpc = AMDGPU::S_XOR_B32;
MovTermOpc = AMDGPU::S_MOV_B32_term;
Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
@@ -495,6 +580,7 @@ bool SILowerControlFlow::runOnMachineFun
} else {
AndOpc = AMDGPU::S_AND_B64;
OrOpc = AMDGPU::S_OR_B64;
+ OrTermOpc = AMDGPU::S_OR_B64_term;
XorOpc = AMDGPU::S_XOR_B64;
MovTermOpc = AMDGPU::S_MOV_B64_term;
Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
@@ -507,11 +593,11 @@ bool SILowerControlFlow::runOnMachineFun
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; BI = NextBB) {
NextBB = std::next(BI);
- MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock *MBB = &*BI;
MachineBasicBlock::iterator I, Next, Last;
- for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
+ for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
@@ -532,10 +618,24 @@ bool SILowerControlFlow::runOnMachineFun
emitLoop(MI);
break;
- case AMDGPU::SI_END_CF:
+ case AMDGPU::SI_END_CF: {
+ MachineInstr *NextMI = nullptr;
+
+ if (Next != MBB->end())
+ NextMI = &*Next;
+
emitEndCf(MI);
- break;
+ if (NextMI) {
+ MBB = NextMI->getParent();
+ Next = NextMI->getIterator();
+ Last = MBB->end();
+ }
+
+ NextBB = std::next(MBB->getIterator());
+ BE = MF.end();
+ break;
+ }
case AMDGPU::S_AND_B64:
case AMDGPU::S_OR_B64:
case AMDGPU::S_AND_B32:
@@ -551,7 +651,7 @@ bool SILowerControlFlow::runOnMachineFun
}
// Replay newly inserted code to combine masks
- Next = (Last == MBB.end()) ? MBB.begin() : Last;
+ Next = (Last == MBB->end()) ? MBB->begin() : Last;
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp?rev=367500&r1=367499&r2=367500&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp Wed Jul 31 18:25:27 2019
@@ -202,6 +202,12 @@ static bool removeTerminatorBit(const SI
MI.setDesc(TII.get(AMDGPU::S_OR_B32));
return true;
}
+ case AMDGPU::S_OR_B64_term: {
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(TII.get(AMDGPU::S_OR_B64));
+ return true;
+ }
case AMDGPU::S_ANDN2_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
Modified: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp?rev=367500&r1=367499&r2=367500&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp Wed Jul 31 18:25:27 2019
@@ -82,14 +82,14 @@ FunctionPass *llvm::createSIOptimizeExec
return new SIOptimizeExecMaskingPreRA();
}
-static bool isEndCF(const MachineInstr &MI, const SIRegisterInfo *TRI,
- const GCNSubtarget &ST) {
+static bool isEndCF(const MachineInstr &MI, const GCNSubtarget &ST,
+ const SIRegisterInfo *TRI) {
if (ST.isWave32()) {
- return MI.getOpcode() == AMDGPU::S_OR_B32 &&
+ return MI.getOpcode() == AMDGPU::S_OR_B32_term &&
MI.modifiesRegister(AMDGPU::EXEC_LO, TRI);
}
- return MI.getOpcode() == AMDGPU::S_OR_B64 &&
+ return MI.getOpcode() == AMDGPU::S_OR_B64_term &&
MI.modifiesRegister(AMDGPU::EXEC, TRI);
}
@@ -380,13 +380,13 @@ bool SIOptimizeExecMaskingPreRA::runOnMa
// Try to collapse adjacent endifs.
auto E = MBB.end();
- auto Lead = skipDebugInstructionsForward(MBB.begin(), E);
- if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI, ST))
+ auto Lead = MBB.getFirstTerminator();
+ if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, ST, TRI))
continue;
MachineBasicBlock *TmpMBB = &MBB;
auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead));
- if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI, ST) ||
+ if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, ST, TRI) ||
!getOrExecSource(*NextLead, *TII, MRI, ST))
continue;
Modified: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir?rev=367500&r1=367499&r2=367500&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir Wed Jul 31 18:25:27 2019
@@ -49,8 +49,10 @@ body: |
; GCN: successors: %bb.4(0x80000000)
; GCN: DBG_VALUE
; GCN: bb.4:
+ ; GCN: successors: %bb.5(0x80000000)
; GCN: DBG_VALUE
- ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.5:
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
@@ -95,12 +97,14 @@ body: |
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
- $exec = S_OR_B64 $exec, %12, implicit-def $scc
DBG_VALUE
+ $exec = S_OR_B64_term $exec, %12, implicit-def $scc
bb.4:
DBG_VALUE
- $exec = S_OR_B64 $exec, %3, implicit-def $scc
+ $exec = S_OR_B64_term $exec, %3, implicit-def $scc
+
+ bb.5:
%15:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
@@ -121,7 +125,7 @@ machineFunctionInfo:
body: |
; GCN-LABEL: name: simple_nested_if_empty_block_between
; GCN: bb.0:
- ; GCN: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GCN: successors: %bb.1(0x40000000), %bb.4(0x40000000)
; GCN: liveins: $vgpr0, $sgpr0_sgpr1
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -129,7 +133,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]]
- ; GCN: SI_MASK_BRANCH %bb.5, implicit $exec
+ ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -158,7 +162,9 @@ body: |
; GCN: bb.4:
; GCN: successors: %bb.5(0x80000000)
; GCN: bb.5:
- ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.6:
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
@@ -203,12 +209,14 @@ body: |
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
- $exec = S_OR_B64 $exec, %12, implicit-def $scc
+ $exec = S_OR_B64_term $exec, %12, implicit-def $scc
+
+ bb.4:
bb.5:
+ $exec = S_OR_B64_term $exec, %3, implicit-def $scc
- bb.4:
- $exec = S_OR_B64 $exec, %3, implicit-def $scc
+ bb.6:
%15:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
@@ -229,7 +237,7 @@ machineFunctionInfo:
body: |
; GCN-LABEL: name: simple_nested_if_empty_block_dbg_between
; GCN: bb.0:
- ; GCN: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GCN: successors: %bb.1(0x40000000), %bb.4(0x40000000)
; GCN: liveins: $vgpr0, $sgpr0_sgpr1
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -267,7 +275,9 @@ body: |
; GCN: successors: %bb.5(0x80000000)
; GCN: DBG_VALUE
; GCN: bb.5:
- ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.6:
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
@@ -283,7 +293,7 @@ body: |
%3:sreg_64 = COPY $exec, implicit-def $exec
%4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
$exec = S_MOV_B64_term %4
- SI_MASK_BRANCH %bb.4, implicit $exec
+ SI_MASK_BRANCH %bb.5, implicit $exec
S_BRANCH %bb.1
bb.1:
@@ -312,13 +322,15 @@ body: |
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
- $exec = S_OR_B64 $exec, %12, implicit-def $scc
+ $exec = S_OR_B64_term $exec, %12, implicit-def $scc
- bb.5:
+ bb.4:
DBG_VALUE
- bb.4:
- $exec = S_OR_B64 $exec, %3, implicit-def $scc
+ bb.5:
+ $exec = S_OR_B64_term $exec, %3, implicit-def $scc
+
+ bb.6:
%15:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
@@ -360,8 +372,7 @@ body: |
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
- ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
- ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+ ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
; GCN: SI_MASK_BRANCH %bb.3, implicit $exec
; GCN: S_BRANCH %bb.2
@@ -376,9 +387,10 @@ body: |
; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GCN: dead %16:sgpr_32 = S_BREV_B32 [[DEF]]
; GCN: KILL [[DEF]]
- ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
; GCN: bb.4:
- ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
+ ; GCN: successors: %bb.5(0x80000000)
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.5:
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
@@ -426,10 +438,12 @@ body: |
%15:sgpr_32 = IMPLICIT_DEF
%16:sgpr_32 = S_BREV_B32 %15
KILL %15
- $exec = S_OR_B64 $exec, %12, implicit-def $scc
+ $exec = S_OR_B64_term $exec, %12, implicit-def $scc
bb.4:
- $exec = S_OR_B64 $exec, %3, implicit-def $scc
+ $exec = S_OR_B64_term $exec, %3, implicit-def $scc
+
+ bb.5:
%17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
@@ -475,7 +489,7 @@ body: |
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
- ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec
+ ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.2:
; GCN: successors: %bb.3(0x80000000)
@@ -485,12 +499,16 @@ body: |
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
+ ; GCN: bb.4:
+ ; GCN: successors: %bb.5(0x80000000)
; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GCN: [[S_BREV_B32_:%[0-9]+]]:sgpr_32 = S_BREV_B32 [[DEF]]
; GCN: KILL [[DEF]]
; GCN: dead %17:sgpr_32 = COPY [[S_BREV_B32_]]
- ; GCN: bb.4:
- ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.5:
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.6:
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
@@ -525,7 +543,7 @@ body: |
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
$exec = S_MOV_B64_term %13
- SI_MASK_BRANCH %bb.3, implicit $exec
+ SI_MASK_BRANCH %bb.4, implicit $exec
S_BRANCH %bb.2
bb.2:
@@ -535,14 +553,18 @@ body: |
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
- $exec = S_OR_B64 $exec, %12, implicit-def $scc
+ $exec = S_OR_B64_term $exec, %12, implicit-def $scc
+
+ bb.4:
%15:sgpr_32 = IMPLICIT_DEF
%16:sgpr_32 = S_BREV_B32 %15
KILL %15
%19:sgpr_32 = COPY %16
- bb.4:
- $exec = S_OR_B64 $exec, %3, implicit-def $scc
+ bb.5:
+ $exec = S_OR_B64_term $exec, %3, implicit-def $scc
+
+ bb.6:
%17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
@@ -598,10 +620,14 @@ body: |
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
- ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
- ; GCN: dead %15:sreg_64 = S_BREV_B64 $exec
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY4]], implicit-def $scc
; GCN: bb.4:
- ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
+ ; GCN: successors: %bb.5(0x80000000)
+ ; GCN: dead %15:sreg_64 = S_BREV_B64 $exec
+ ; GCN: bb.5:
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.6:
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
@@ -646,11 +672,15 @@ body: |
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
- $exec = S_OR_B64 $exec, %12, implicit-def $scc
- %15:sreg_64 = S_BREV_B64 $exec
+ $exec = S_OR_B64_term $exec, %12, implicit-def $scc
bb.4:
- $exec = S_OR_B64 $exec, %3, implicit-def $scc
+ %15:sreg_64 = S_BREV_B64 $exec
+
+ bb.5:
+ $exec = S_OR_B64_term $exec, %3, implicit-def $scc
+
+ bb.6:
%17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
@@ -671,7 +701,7 @@ machineFunctionInfo:
body: |
; GCN-LABEL: name: copy_no_explicit_exec_dependency
; GCN: bb.0:
- ; GCN: successors: %bb.1(0x40000000), %bb.4(0x40000000)
+ ; GCN: successors: %bb.1(0x40000000), %bb.5(0x40000000)
; GCN: liveins: $vgpr0, $sgpr0_sgpr1
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -679,7 +709,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]]
- ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec
+ ; GCN: SI_MASK_BRANCH %bb.5, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -706,17 +736,21 @@ body: |
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
- ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
- ; GCN: dead %15:vgpr_32 = COPY %5.sub2
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY4]], implicit-def $scc
; GCN: bb.4:
- ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
+ ; GCN: successors: %bb.5(0x80000000)
+ ; GCN: dead %15:vgpr_32 = COPY %5.sub2
+ ; GCN: bb.5:
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.6:
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
; GCN: DS_WRITE_B32 [[V_MOV_B32_e32_2]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3)
; GCN: S_ENDPGM 0
bb.0:
- successors: %bb.1, %bb.4
+ successors: %bb.1, %bb.5
liveins: $vgpr0, $sgpr0_sgpr1
%1:sgpr_64 = COPY $sgpr0_sgpr1
@@ -725,7 +759,7 @@ body: |
%3:sreg_64 = COPY $exec, implicit-def $exec
%4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
$exec = S_MOV_B64_term %4
- SI_MASK_BRANCH %bb.4, implicit $exec
+ SI_MASK_BRANCH %bb.5, implicit $exec
S_BRANCH %bb.1
bb.1:
@@ -754,11 +788,15 @@ body: |
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
- $exec = S_OR_B64 $exec, %12, implicit-def $scc
- %15:vgpr_32 = COPY %5.sub2
+ $exec = S_OR_B64_term $exec, %12, implicit-def $scc
bb.4:
- $exec = S_OR_B64 $exec, %3, implicit-def $scc
+ %15:vgpr_32 = COPY %5.sub2
+
+ bb.5:
+ $exec = S_OR_B64_term $exec, %3, implicit-def $scc
+
+ bb.6:
%17:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
@@ -813,17 +851,19 @@ body: |
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
- ; GCN: successors: %bb.5(0x80000000)
- ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
- ; GCN: S_BRANCH %bb.5
+ ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY4]], implicit-def $scc
+ ; GCN: S_BRANCH %bb.6
; GCN: bb.4:
- ; GCN: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
+ ; GCN: successors: %bb.5(0x80000000)
+ ; GCN: $exec = S_OR_B64_term $exec, [[COPY2]], implicit-def $scc
+ ; GCN: bb.5:
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
; GCN: DS_WRITE_B32 [[V_MOV_B32_e32_2]], [[V_MOV_B32_e32_1]], 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3)
; GCN: S_ENDPGM 0
- ; GCN: bb.5:
+ ; GCN: bb.6:
; GCN: successors: %bb.4(0x80000000)
; GCN: S_BRANCH %bb.4
bb.0:
@@ -865,18 +905,20 @@ body: |
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
- $exec = S_OR_B64 $exec, %12, implicit-def $scc
- S_BRANCH %bb.5
+ $exec = S_OR_B64_term $exec, %12, implicit-def $scc
+ S_BRANCH %bb.6
bb.4:
- $exec = S_OR_B64 $exec, %3, implicit-def $scc
+ $exec = S_OR_B64_term $exec, %3, implicit-def $scc
+
+ bb.5:
%15:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
%16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
DS_WRITE_B32 %16, %15, 0, 0, implicit $m0, implicit $exec :: (store 4, addrspace 3)
S_ENDPGM 0
- bb.5:
+ bb.6:
S_BRANCH %bb.4
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll?rev=367500&r1=367499&r2=367500&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll Wed Jul 31 18:25:27 2019
@@ -21,7 +21,7 @@ define amdgpu_cs float @ds_ordered_swap(
; GCN: s_cbranch_execz [[BB:BB._.]]
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
-; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
+; GCN-NEXT: ds_ordered_count v1, v0 offset:4868 gds
; GCN-NEXT: [[BB]]:
; // Wait for expcnt(0) before modifying EXEC
; GCN-NEXT: s_waitcnt expcnt(0)
More information about the llvm-commits
mailing list