[clang] [llvm] [AMDGPU] Change CF intrinsics lowering to reconverge on predecessors. (PR #92809)

Mon May 20 12:11:13 PDT 2024

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 586ecd75606e70a8d16cb1717809acce652ffe7f 7cda2e3ce0d180688250856566b6c75ca07d7711 -- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIInstrInfo.cpp llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 368cc98b9a..97c9e9a32b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -950,19 +950,16 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
   Register LoopMask = MRI.createVirtualRegister(
       TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
   // Update EXEC, switch all done bits to 0 and all todo bits to 1.
-  B.buildInstr(XorTermOpc)
-    .addDef(LoopMask)
-    .addReg(ExecReg)
-    .addReg(NewExec);
+  B.buildInstr(XorTermOpc).addDef(LoopMask).addReg(ExecReg).addReg(NewExec);
 
   // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
   // s_cbranch_scc0?
 
   // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
   B.buildInstr(AMDGPU::SI_WATERFALL_LOOP)
-     .addReg(LoopMask)
-     .addReg(NewExec)
-     .addMBB(LoopBB);
+      .addReg(LoopMask)
+      .addReg(NewExec)
+      .addMBB(LoopBB);
 
   // Save the EXEC mask before the loop.
   BuildMI(MBB, MBB.end(), DL, TII->get(MovExecOpc), SaveExecReg)
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 68d81a6ffa..8e909e5afb 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -13,9 +13,9 @@
 
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -142,7 +142,8 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
   IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
                                       { IntMask });
   Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
-  WaveReconverge = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_wave_reconverge, { IntMask });
+  WaveReconverge = Intrinsic::getDeclaration(
+      &M, Intrinsic::amdgcn_wave_reconverge, {IntMask});
 }
 
 /// Is the branch condition uniform or did the StructurizeCFG pass
@@ -331,14 +332,14 @@ bool SIAnnotateControlFlow::tryWaveReconverge(BasicBlock *BB) {
     for (auto Succ : Term->successors()) {
       if (isTopOfStack(Succ)) {
         // Just split to make a room for further WAVE_RECONVERGE insertion
-        SmallVector<BasicBlock*, 2> Preds;
+        SmallVector<BasicBlock *, 2> Preds;
         for (auto P : predecessors(Succ)) {
           if (DT->dominates(BB, P))
             Preds.push_back(P);
         }
         DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-        SplitBlockPredecessors(Succ, Preds, ".reconverge", &DTU, LI,
-                                            nullptr, false);
+        SplitBlockPredecessors(Succ, Preds, ".reconverge", &DTU, LI, nullptr,
+                               false);
       }
     }
   }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ea1e7c782e..b3984d4124 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9941,8 +9941,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
     return SDValue(Load, 0);
   }
   case Intrinsic::amdgcn_wave_reconverge:
-    return SDValue(DAG.getMachineNode(AMDGPU::SI_WAVE_RECONVERGE, DL, MVT::Other,
-                                      Op->getOperand(2), Chain), 0);
+    return SDValue(DAG.getMachineNode(AMDGPU::SI_WAVE_RECONVERGE, DL,
+                                      MVT::Other, Op->getOperand(2), Chain),
+                   0);
   case Intrinsic::amdgcn_s_barrier_init:
   case Intrinsic::amdgcn_s_barrier_join:
   case Intrinsic::amdgcn_s_wakeup_barrier: {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3412846a5a..9786a382f6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6538,7 +6538,8 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
   // BuildMI(*BodyBB, BodyBB->end(), DL, TII.get(AMDGPU::S_BRANCH))
   //       .addMBB(RemainderBB);
   // Restore the EXEC mask
-  // BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec);
+  // BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc),
+  // Exec).addReg(SaveExec);
   return BodyBB;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 99ecff2d95..c494897392 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -216,9 +216,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
   // Get rid of the garbage bits in the Cond register which might be coming from
   // the bitwise arithmetic when one of the expression operands is coming from
   // the outer scope and hence having extra bits set.
-  MachineInstr *CondFiltered = BuildMI(MBB, I, DL, TII->get(AndOpc), MaskThen)
-                                   .add(Cond)
-                                   .addReg(Exec);
+  MachineInstr *CondFiltered =
+      BuildMI(MBB, I, DL, TII->get(AndOpc), MaskThen).add(Cond).addReg(Exec);
   if (LV)
     LV->replaceKillInstruction(CondReg, MI, *CondFiltered);
 
@@ -306,9 +305,9 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
                                  .addReg(MaskLoop)
                                  .addImm(TestMask);
 
-  MachineInstr *SetExec= BuildMI(MBB, &MI, DL, TII->get(Select), Exec)
-                                     .addReg(MaskLoop)
-                                     .addReg(Cond);
+  MachineInstr *SetExec = BuildMI(MBB, &MI, DL, TII->get(Select), Exec)
+                              .addReg(MaskLoop)
+                              .addReg(Cond);
 
   if (LV)
     LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *SetExec);
@@ -341,15 +340,17 @@ void SILowerControlFlow::emitWaterfallLoop(MachineInstr &MI) {
   Register AndZero = MRI->createVirtualRegister(
       TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
 
-  MachineInstr *MaskZeroTest = BuildMI(*BodyBB, I, DL, TII->get(AndTermOpc), AndZero)
-      .addReg(LoopMask)
-      .addImm(TestMask);
+  MachineInstr *MaskZeroTest =
+      BuildMI(*BodyBB, I, DL, TII->get(AndTermOpc), AndZero)
+          .addReg(LoopMask)
+          .addImm(TestMask);
 
   MachineInstr *UpdateExec = BuildMI(*BodyBB, I, DL, TII->get(Select), Exec)
-      .addReg(LoopMask)
-      .addReg(ExitMask);
+                                 .addReg(LoopMask)
+                                 .addReg(ExitMask);
 
-  MachineInstr *Branch = BuildMI(*BodyBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1)).addMBB(LoopBB);
+  MachineInstr *Branch =
+      BuildMI(*BodyBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1)).addMBB(LoopBB);
 
   if (LIS) {
     RecomputeRegs.insert(MI.getOperand(0).getReg());
@@ -405,7 +406,7 @@ void SILowerControlFlow::emitWaveDiverge(MachineInstr &MI,
     MachineInstr *CopyExec =
         BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DisableLanesMask)
             .addReg(Exec);
-    if(LIS)
+    if (LIS)
       LIS->InsertMachineInstrInMaps(*CopyExec);
   }
   Register TestResultReg = MRI->createVirtualRegister(BoolRC);
@@ -463,7 +464,7 @@ void SILowerControlFlow::emitWaveDiverge(MachineInstr &MI,
   LIS->removeAllRegUnitsForPhysReg(Exec);
 }
 
-void SILowerControlFlow::emitWaveReconverge(MachineInstr &MI) { 
+void SILowerControlFlow::emitWaveReconverge(MachineInstr &MI) {
 
   MachineBasicBlock &BB = *MI.getParent();
   Register Mask = MI.getOperand(0).getReg();

``````````

</details>


https://github.com/llvm/llvm-project/pull/92809