[clang] [llvm] [AMDGPU] Change CF intrinsics lowering to reconverge on predecessors. (PR #92809)
via cfe-commits
cfe-commits at lists.llvm.org
Mon May 20 12:11:13 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 586ecd75606e70a8d16cb1717809acce652ffe7f 7cda2e3ce0d180688250856566b6c75ca07d7711 -- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIInstrInfo.cpp llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 368cc98b9a..97c9e9a32b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -950,19 +950,16 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
Register LoopMask = MRI.createVirtualRegister(
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
- B.buildInstr(XorTermOpc)
- .addDef(LoopMask)
- .addReg(ExecReg)
- .addReg(NewExec);
+ B.buildInstr(XorTermOpc).addDef(LoopMask).addReg(ExecReg).addReg(NewExec);
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
// s_cbranch_scc0?
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
B.buildInstr(AMDGPU::SI_WATERFALL_LOOP)
- .addReg(LoopMask)
- .addReg(NewExec)
- .addMBB(LoopBB);
+ .addReg(LoopMask)
+ .addReg(NewExec)
+ .addMBB(LoopBB);
// Save the EXEC mask before the loop.
BuildMI(MBB, MBB.end(), DL, TII->get(MovExecOpc), SaveExecReg)
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 68d81a6ffa..8e909e5afb 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -13,9 +13,9 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -142,7 +142,8 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
{ IntMask });
Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
- WaveReconverge = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_wave_reconverge, { IntMask });
+ WaveReconverge = Intrinsic::getDeclaration(
+ &M, Intrinsic::amdgcn_wave_reconverge, {IntMask});
}
/// Is the branch condition uniform or did the StructurizeCFG pass
@@ -331,14 +332,14 @@ bool SIAnnotateControlFlow::tryWaveReconverge(BasicBlock *BB) {
for (auto Succ : Term->successors()) {
if (isTopOfStack(Succ)) {
// Just split to make a room for further WAVE_RECONVERGE insertion
- SmallVector<BasicBlock*, 2> Preds;
+ SmallVector<BasicBlock *, 2> Preds;
for (auto P : predecessors(Succ)) {
if (DT->dominates(BB, P))
Preds.push_back(P);
}
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- SplitBlockPredecessors(Succ, Preds, ".reconverge", &DTU, LI,
- nullptr, false);
+ SplitBlockPredecessors(Succ, Preds, ".reconverge", &DTU, LI, nullptr,
+ false);
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ea1e7c782e..b3984d4124 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9941,8 +9941,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue(Load, 0);
}
case Intrinsic::amdgcn_wave_reconverge:
- return SDValue(DAG.getMachineNode(AMDGPU::SI_WAVE_RECONVERGE, DL, MVT::Other,
- Op->getOperand(2), Chain), 0);
+ return SDValue(DAG.getMachineNode(AMDGPU::SI_WAVE_RECONVERGE, DL,
+ MVT::Other, Op->getOperand(2), Chain),
+ 0);
case Intrinsic::amdgcn_s_barrier_init:
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier: {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3412846a5a..9786a382f6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6538,7 +6538,8 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
// BuildMI(*BodyBB, BodyBB->end(), DL, TII.get(AMDGPU::S_BRANCH))
// .addMBB(RemainderBB);
// Restore the EXEC mask
- // BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec);
+ // BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc),
+ // Exec).addReg(SaveExec);
return BodyBB;
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 99ecff2d95..c494897392 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -216,9 +216,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// Get rid of the garbage bits in the Cond register which might be coming from
// the bitwise arithmetic when one of the expression operands is coming from
// the outer scope and hence having extra bits set.
- MachineInstr *CondFiltered = BuildMI(MBB, I, DL, TII->get(AndOpc), MaskThen)
- .add(Cond)
- .addReg(Exec);
+ MachineInstr *CondFiltered =
+ BuildMI(MBB, I, DL, TII->get(AndOpc), MaskThen).add(Cond).addReg(Exec);
if (LV)
LV->replaceKillInstruction(CondReg, MI, *CondFiltered);
@@ -306,9 +305,9 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
.addReg(MaskLoop)
.addImm(TestMask);
- MachineInstr *SetExec= BuildMI(MBB, &MI, DL, TII->get(Select), Exec)
- .addReg(MaskLoop)
- .addReg(Cond);
+ MachineInstr *SetExec = BuildMI(MBB, &MI, DL, TII->get(Select), Exec)
+ .addReg(MaskLoop)
+ .addReg(Cond);
if (LV)
LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *SetExec);
@@ -341,15 +340,17 @@ void SILowerControlFlow::emitWaterfallLoop(MachineInstr &MI) {
Register AndZero = MRI->createVirtualRegister(
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
- MachineInstr *MaskZeroTest = BuildMI(*BodyBB, I, DL, TII->get(AndTermOpc), AndZero)
- .addReg(LoopMask)
- .addImm(TestMask);
+ MachineInstr *MaskZeroTest =
+ BuildMI(*BodyBB, I, DL, TII->get(AndTermOpc), AndZero)
+ .addReg(LoopMask)
+ .addImm(TestMask);
MachineInstr *UpdateExec = BuildMI(*BodyBB, I, DL, TII->get(Select), Exec)
- .addReg(LoopMask)
- .addReg(ExitMask);
+ .addReg(LoopMask)
+ .addReg(ExitMask);
- MachineInstr *Branch = BuildMI(*BodyBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1)).addMBB(LoopBB);
+ MachineInstr *Branch =
+ BuildMI(*BodyBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1)).addMBB(LoopBB);
if (LIS) {
RecomputeRegs.insert(MI.getOperand(0).getReg());
@@ -405,7 +406,7 @@ void SILowerControlFlow::emitWaveDiverge(MachineInstr &MI,
MachineInstr *CopyExec =
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DisableLanesMask)
.addReg(Exec);
- if(LIS)
+ if (LIS)
LIS->InsertMachineInstrInMaps(*CopyExec);
}
Register TestResultReg = MRI->createVirtualRegister(BoolRC);
@@ -463,7 +464,7 @@ void SILowerControlFlow::emitWaveDiverge(MachineInstr &MI,
LIS->removeAllRegUnitsForPhysReg(Exec);
}
-void SILowerControlFlow::emitWaveReconverge(MachineInstr &MI) {
+void SILowerControlFlow::emitWaveReconverge(MachineInstr &MI) {
MachineBasicBlock &BB = *MI.getParent();
Register Mask = MI.getOperand(0).getReg();
``````````
</details>
https://github.com/llvm/llvm-project/pull/92809
More information about the cfe-commits
mailing list