[llvm] [AMDGPU] Introduce conditional barrier pseudo instruction (PR #171604)

Wed Dec 10 04:19:34 PST 2025

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/lib/Target/AMDGPU/AMDGPUExpandCondBarrier.cpp llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --diff_from_common_commit
``````````

:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExpandCondBarrier.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExpandCondBarrier.cpp
index 6bba1fce6..7432d93d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUExpandCondBarrier.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUExpandCondBarrier.cpp
@@ -15,9 +15,9 @@
 #include "GCNSubtarget.h"
 #include "SIInstrInfo.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/InitializePasses.h"
 
 using namespace llvm;
@@ -55,7 +55,8 @@ INITIALIZE_PASS(AMDGPUExpandCondBarrier, DEBUG_TYPE,
 bool AMDGPUExpandCondBarrier::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
-  // Collect all SI_COND_BARRIER instructions first to avoid iterator invalidation.
+  // Collect all SI_COND_BARRIER instructions first to avoid iterator
+  // invalidation.
   SmallVector<MachineInstr *, 4> CondBarriers;
 
   for (auto &MBB : MF) {
@@ -83,7 +84,8 @@ bool AMDGPUExpandCondBarrier::expandCondBarrier(MachineBasicBlock &MBB,
   DebugLoc DL = MI.getDebugLoc();
 
   // Get the variant that determines barrier execution condition.
-  // This allows complementary thread groups to synchronize on opposite conditions.
+  // This allows complementary thread groups to synchronize on opposite
+  // conditions.
   unsigned Variant = MI.getOperand(0).getImm();
 
   // Split current block only if there are instructions after MI.
@@ -109,12 +111,14 @@ bool AMDGPUExpandCondBarrier::expandCondBarrier(MachineBasicBlock &MBB,
   MF->insert(MBBI, BarrierMBB);
 
   // 1. Conditional branch to skip barrier based on variant:
-  //    Variant 0: Execute barrier when SCC=1, skip when SCC=0 (use S_CBRANCH_SCC0).
-  //    Variant 1: Execute barrier when SCC=0, skip when SCC=1 (use S_CBRANCH_SCC1).
-  unsigned BranchOpcode = (Variant == 0) ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1;
-  BuildMI(MBB, &MI, DL, TII->get(BranchOpcode))
-      .addMBB(ContinueMBB);
-  LLVM_DEBUG(dbgs() << "ExpandCondBarrier: Variant " << Variant << " expansion\n");
+  //    Variant 0: Execute barrier when SCC=1, skip when SCC=0 (use
+  //    S_CBRANCH_SCC0). Variant 1: Execute barrier when SCC=0, skip when SCC=1
+  //    (use S_CBRANCH_SCC1).
+  unsigned BranchOpcode =
+      (Variant == 0) ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1;
+  BuildMI(MBB, &MI, DL, TII->get(BranchOpcode)).addMBB(ContinueMBB);
+  LLVM_DEBUG(dbgs() << "ExpandCondBarrier: Variant " << Variant
+                    << " expansion\n");
 
   // 2. Insert barrier in fallthrough block.
   BuildMI(*BarrierMBB, BarrierMBB->end(), DL, TII->get(AMDGPU::S_BARRIER));
@@ -124,10 +128,13 @@ bool AMDGPUExpandCondBarrier::expandCondBarrier(MachineBasicBlock &MBB,
       .addMBB(ContinueMBB);
 
   // 4. Set up CFG with both paths.
-  // For S_CBRANCH_SCC0: SCC=0 -> branch to ContinueMBB, SCC=1 -> fallthrough to BarrierMBB
-  MBB.addSuccessor(BarrierMBB);      // Barrier path (implicit fallthrough when SCC=1)
-  MBB.addSuccessor(ContinueMBB);     // Skip barrier path (explicit branch target when SCC=0)
-  BarrierMBB->addSuccessor(ContinueMBB);  // Barrier to continue
+  // For S_CBRANCH_SCC0: SCC=0 -> branch to ContinueMBB, SCC=1 -> fallthrough to
+  // BarrierMBB
+  MBB.addSuccessor(
+      BarrierMBB); // Barrier path (implicit fallthrough when SCC=1)
+  MBB.addSuccessor(
+      ContinueMBB); // Skip barrier path (explicit branch target when SCC=0)
+  BarrierMBB->addSuccessor(ContinueMBB); // Barrier to continue
 
   // Remove the pseudo-instruction.
   MI.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ac26a19bd..7d3f47b96 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2110,7 +2110,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.setDesc(get(AMDGPU::V_READLANE_B32));
     break;
 
-
   case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
     Register Dst = MI.getOperand(0).getReg();
     bool IsAGPR = SIRegisterInfo::isAGPRClass(RI.getPhysRegBaseClass(Dst));

``````````

</details>


https://github.com/llvm/llvm-project/pull/171604