[llvm] [AMDGPU] Fix phi injection in si-i1-lowering (PR #179267)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 3 08:28:21 PST 2026
https://github.com/idubinov updated https://github.com/llvm/llvm-project/pull/179267
>From 269afdf2aa1d52d415fe4bb7a87ebc99d3a60039 Mon Sep 17 00:00:00 2001
From: idubinov <igor.dubinov at amd.com>
Date: Mon, 2 Feb 2026 08:41:31 -0600
Subject: [PATCH 1/6] Fix phi injection in si-i1-lowering
---
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 173 +++++++++++++++---
llvm/lib/Target/AMDGPU/SILowerI1Copies.h | 7 +-
...-copies-implicit-def-unstructured-loop.mir | 30 +--
.../si-lower-i1-copies-phi-dependencies.mir | 164 +++++++++++++++++
4 files changed, 333 insertions(+), 41 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 5b329f46930ca..ef675eb958958 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -25,6 +25,8 @@
#include "AMDGPU.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/InitializePasses.h"
+#include <iterator>
+#include <optional>
#define DEBUG_TYPE "si-i1-copies"
@@ -389,8 +391,7 @@ insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
#ifndef NDEBUG
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI,
- const MachineRegisterInfo &MRI,
- Register Reg) {
+ const MachineRegisterInfo &MRI, Register Reg) {
unsigned Size = TRI.getRegSizeInBits(Reg, MRI);
return Size == 1 || Size == 32;
}
@@ -447,9 +448,10 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
ST = &MF->getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
- IsWave32 = ST->isWave32();
+ WavefrontSize = ST->getWavefrontSize();
+ assert((WavefrontSize == 32 || WavefrontSize == 64));
- if (IsWave32) {
+ if (WavefrontSize == 32) {
ExecReg = AMDGPU::EXEC_LO;
MovOp = AMDGPU::S_MOV_B32;
AndOp = AMDGPU::S_AND_B32;
@@ -457,6 +459,8 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
XorOp = AMDGPU::S_XOR_B32;
AndN2Op = AMDGPU::S_ANDN2_B32;
OrN2Op = AMDGPU::S_ORN2_B32;
+ CSelectOp = AMDGPU::S_CSELECT_B32;
+ CmpLGOp = AMDGPU::S_CMP_LG_U32;
} else {
ExecReg = AMDGPU::EXEC;
MovOp = AMDGPU::S_MOV_B64;
@@ -465,9 +469,148 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
XorOp = AMDGPU::S_XOR_B64;
AndN2Op = AMDGPU::S_ANDN2_B64;
OrN2Op = AMDGPU::S_ORN2_B64;
+ CSelectOp = AMDGPU::S_CSELECT_B64;
+ CmpLGOp = AMDGPU::S_CMP_LG_U64;
}
}
+static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
+ Def = false;
+ Use = false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
+ if (MO.isUse())
+ Use = true;
+ else
+ Def = true;
+ }
+ }
+}
+
+/// Move instruction to a new position inside the same MBB, if there is no
+/// operand's dependencies. Change the InstrToMovePos after the moved
+/// instruction. returns true if instruction moved, false if not.
+bool moveIfPossible(MachineBasicBlock &MBB,
+ llvm::MachineBasicBlock::iterator &InstrToMovePos,
+ const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+ MachineInstr &MI = *InstrToMovePos;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ // Check if any operands are defined between current position and target
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse()) {
+ for (auto I = std::next(MI.getIterator()); I != MoveAfterPos; ++I) {
+ for (const MachineOperand &MOI : I->operands())
+ if (MOI.isReg() && MOI.isDef() && MOI.getReg() == MO.getReg())
+ return false;
+ }
+ }
+
+ // Check if MI defines any register used before InsertPos
+ if (MO.isDef()) {
+ for (auto I = MoveAfterPos; I != MI.getIterator(); --I) {
+ for (const MachineOperand &MOI : I->operands())
+ if (MOI.isReg() && MOI.isUse() && MOI.getReg() == MO.getReg())
+ return false;
+ }
+ }
+ }
+
+ MI.removeFromParent();
+ MBB.insertAfter(MoveAfterPos, &MI);
+ InstrToMovePos = MoveAfterPos;
+ InstrToMovePos++;
+ return true;
+}
+
+/// Insert mask calculation procedure.
+/// Finds a place for insertion, reorganize instruction if needed,
+/// store/restore SCC register if needed.
+void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
+ MachineBasicBlock &MBB = *Incoming.Block;
+ auto FirstTerminator = MBB.getFirstTerminator();
+
+ bool TerminatorsUseSCC = false;
+ for (auto I = FirstTerminator, E = MBB.end(); I != E; ++I) {
+ bool DefsSCC;
+ instrDefsUsesSCC(*I, DefsSCC, TerminatorsUseSCC);
+ if (TerminatorsUseSCC || DefsSCC)
+ break;
+ }
+
+ if (!TerminatorsUseSCC) {
+ buildMergeLaneMasks(MBB, FirstTerminator, {}, Incoming.UpdatedReg, DstReg,
+ Incoming.Reg);
+ return;
+ }
+
+ std::optional<llvm::MachineBasicBlock::iterator> sccDefPos, curRegDefPos;
+ for (auto I = FirstTerminator; I != MBB.begin(); --I) {
+ const llvm::iterator_range<llvm::MachineOperand *> IMO = I->operands();
+
+ for (const auto &MO : IMO) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ Register R = MO.getReg();
+
+ if (R == Incoming.Reg) {
+ curRegDefPos = I;
+ }
+
+ if (R == AMDGPU::SCC) {
+ sccDefPos = I;
+ break;
+ }
+ }
+
+ if (sccDefPos)
+ break;
+ }
+
+ assert(sccDefPos);
+
+ if (!curRegDefPos) {
+ /// SCC define is after any of operator defines
+ buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+ Incoming.Reg);
+ return;
+ }
+
+ assert(curRegDefPos && std::distance(MBB.begin(), curRegDefPos.value()) >
+ std::distance(MBB.begin(), sccDefPos.value()));
+
+ /// Try to move the SCC def operator after the latest operator
+ if (moveIfPossible(MBB, sccDefPos.value(), curRegDefPos.value())) {
+ buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+ Incoming.Reg);
+ return;
+ }
+
+ /// if not possible: store/restore SCC register
+ curRegDefPos.value()++;
+
+ /// store SCC
+ Register SavedSCC = MRI->createVirtualRegister(
+ WavefrontSize == 32 ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
+ BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CSelectOp), SavedSCC)
+ .addImm(1)
+ .addImm(0);
+
+ buildMergeLaneMasks(MBB, curRegDefPos.value(), {}, Incoming.UpdatedReg,
+ DstReg, Incoming.Reg);
+
+ /// restore SCC
+ BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CmpLGOp))
+ .addReg(SavedSCC)
+ .addImm(0)
+ .addReg(AMDGPU::SCC, RegState::ImplicitDefine);
+ return;
+}
+
bool PhiLoweringHelper::lowerPhis() {
MachineSSAUpdater SSAUpdater(*MF);
LoopFinder LF(*DT, *PDT);
@@ -537,9 +680,7 @@ bool PhiLoweringHelper::lowerPhis() {
for (auto &Incoming : Incomings) {
MachineBasicBlock &IMBB = *Incoming.Block;
- buildMergeLaneMasks(
- IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
- SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
+ insertMask(Incoming, SSAUpdater.GetValueInMiddleOfBlock(&IMBB));
}
} else {
// The phi is not observed from outside a loop. Use a more accurate
@@ -566,9 +707,7 @@ bool PhiLoweringHelper::lowerPhis() {
continue;
MachineBasicBlock &IMBB = *Incoming.Block;
- buildMergeLaneMasks(
- IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
- SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
+ insertMask(Incoming, SSAUpdater.GetValueInMiddleOfBlock(&IMBB));
}
}
@@ -696,20 +835,6 @@ bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
return false;
}
-static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
- Def = false;
- Use = false;
-
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
- if (MO.isUse())
- Use = true;
- else
- Def = true;
- }
- }
-}
-
/// Return a point at the end of the given \p MBB to insert SALU instructions
/// for lane mask calculation. Take terminators and SCC into account.
MachineBasicBlock::iterator
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index fd90328c2b926..bf96a07a611f4 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -20,7 +20,7 @@
namespace llvm {
-/// Incoming for lane maks phi as machine instruction, incoming register \p Reg
+/// Incoming for lane mask phi as machine instruction, incoming register \p Reg
/// and incoming block \p Block are taken from machine instruction.
/// \p UpdatedReg (if valid) is \p Reg lane mask merged with another lane mask.
struct Incoming {
@@ -42,7 +42,7 @@ class PhiLoweringHelper {
virtual ~PhiLoweringHelper() = default;
protected:
- bool IsWave32 = false;
+ unsigned WavefrontSize;
MachineFunction *MF = nullptr;
MachineDominatorTree *DT = nullptr;
MachinePostDominatorTree *PDT = nullptr;
@@ -62,12 +62,15 @@ class PhiLoweringHelper {
unsigned XorOp;
unsigned AndN2Op;
unsigned OrN2Op;
+ unsigned CSelectOp;
+ unsigned CmpLGOp;
public:
bool lowerPhis();
bool isConstantLaneMask(Register Reg, bool &Val) const;
MachineBasicBlock::iterator
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+ void insertMask(const Incoming &Incoming, Register DstReg);
void initializeLaneMaskRegisterAttributes(Register LaneMask) {
LaneMaskRegAttrs = MRI->getVRegAttrs(LaneMask);
diff --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
index 028d511c6bf86..50219b38e636a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
@@ -16,7 +16,7 @@ machineFunctionInfo:
body: |
; CHECK-LABEL: name: recursive_vreg_1_phi
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -47,23 +47,23 @@ body: |
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; CHECK-NEXT: [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-LABEL: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.3(0x{{[0-9a-fA-F]+}})
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
- ; CHECK-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
- ; CHECK-NEXT: S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
- ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-DAG: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
+ ; CHECK-DAG: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
+ ; CHECK-DAG: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
+ ; CHECK-DAG: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-DAG: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+ ; CHECK-DAG: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
+ ; CHECK-DAG: S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
+ ; CHECK-DAG: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+ ; CHECK-DAG: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-LABEL: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x{{[0-9a-fA-F]+}})
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
; CHECK-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6
@@ -83,7 +83,7 @@ body: |
; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: successors: %bb.4(0x00000000), %bb.1(0x80000000)
+ ; CHECK-NEXT: successors: %bb.4(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2
; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
new file mode 100644
index 0000000000000..39fc7e34505d1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
@@ -0,0 +1,164 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-i1-copies -o - %s | FileCheck -check-prefixes=GCN %s
+
+---
+name: phi_with_dependencies1
+# SCC def instruction (S_CMP_LG_U32) is below PHI dependancy ($17)
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: phi_with_dependencies1
+ ; GCN-LABEL: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-NEXT: {{ $}}
+ ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-LABEL: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+ ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
+ ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+ ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-LABEL: bb.2:
+ ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x80000000)
+ %0:sreg_32 = S_MOV_B32 1
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2, %bb.1
+ %13:sreg_32 = S_MOV_B32 2
+ %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %17:sreg_64 = V_CMP_EQ_U32_e64 killed %16:vgpr_32, 1, implicit $exec
+ %1:vreg_1 = COPY %17:sreg_64
+ S_CMP_LG_U32 %0:sreg_32, killed %13:sreg_32, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; predecessors: %bb.1
+ %2:vreg_1 = PHI %1:vreg_1, %bb.1
+ %19:sreg_64_xexec = COPY %2:vreg_1
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: phi_with_dependencies2
+---
+name: phi_with_dependencies2
+# SCC def instruction (S_CMP_LG_U32) is above PHI dependancy ($17) definition and can be lowered
+tracksRegLiveness: true
+body: |
+
+ ; GCN-LABEL: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-NEXT: {{ $}}
+ ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GCN-NEXT: S_BRANCH %bb.1
+
+ ; GCN-LABEL: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+ ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GCN-NEXT: [[V_MOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[V_CMP_EQ:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV]], 1, implicit $exec
+ ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 %9, $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_OR:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+ ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+
+ ; GCN-LABEL: bb.2:
+ ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+ ; GCN-NEXT: S_ENDPGM 0
+
+ bb.0:
+ successors: %bb.1(0x80000000); %bb.1(100.00%)
+ %0:sreg_32 = S_MOV_B32 1
+ S_BRANCH %bb.1
+
+ bb.1:
+ ; predecessors: %bb.0, %bb.1
+ successors: %bb.2, %bb.1
+
+ %13:sreg_32 = S_MOV_B32 2
+ S_CMP_LG_U32 %0:sreg_32, killed %13:sreg_32, implicit-def $scc
+ %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %17:sreg_64 = V_CMP_EQ_U32_e64 killed %16:vgpr_32, 1, implicit $exec
+ %1:vreg_1 = COPY %17:sreg_64
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; predecessors: %bb.1
+ %2:vreg_1 = PHI %1:vreg_1, %bb.1
+ %19:sreg_64_xexec = COPY %2:vreg_1
+ S_ENDPGM 0
+...
+
+
+# GCN-LABEL: name: phi_with_dependencies3
+---
+name: phi_with_dependencies3
+# SCC def instruction (V_MOV_B32_e32) is above PHI dependancy ($17) definition and cannot be lowered
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-NEXT: {{ $}}
+ ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-LABEL: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+ ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+ ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
+ ; GCN-NEXT: [[HIDE_CSS:%[0-9]+]]:sreg_64 = S_CSELECT_B64 1, 0, implicit $scc
+ ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+ ; GCN-NEXT: S_CMP_LG_U64 [[HIDE_CSS]], 0, implicit-def $scc, implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-LABEL: bb.2:
+ ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+ ; GCN-NEXT: S_ENDPGM 0
+
+ bb.0:
+ successors: %bb.1(0x80000000); %bb.1(100.00%)
+ %0:sreg_32 = S_MOV_B32 1
+ S_BRANCH %bb.1
+
+ bb.1:
+ ; predecessors: %bb.0, %bb.1
+ successors: %bb.2(0x40000000), %bb.1(0x40000000); %bb.2(50.00%), %bb.1(50.00%)
+
+ %1:sreg_32 = S_MOV_B32 2
+ S_CMP_LG_U32 %0:sreg_32, killed %1:sreg_32, implicit-def $scc
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec, implicit-def $scc
+ %4:sreg_64 = V_CMP_EQ_U32_e64 killed %2:vgpr_32, 1, implicit $exec
+ %5:vreg_1 = COPY %4:sreg_64
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ ; predecessors: %bb.1
+ %6:vreg_1 = PHI %5:vreg_1, %bb.1
+ %7:sreg_64_xexec = COPY %6:vreg_1
+ S_ENDPGM 0
+
+...
>From 4d4d326aa3b859955042cd315a13e26d2a87f2ce Mon Sep 17 00:00:00 2001
From: idubinov <igor.dubinov at amd.com>
Date: Mon, 2 Feb 2026 10:13:24 -0600
Subject: [PATCH 2/6] Autogenerated tests
---
...-copies-implicit-def-unstructured-loop.mir | 30 ++--
.../si-lower-i1-copies-phi-dependencies.mir | 140 +++++++++---------
2 files changed, 86 insertions(+), 84 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
index 50219b38e636a..028d511c6bf86 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
@@ -16,7 +16,7 @@ machineFunctionInfo:
body: |
; CHECK-LABEL: name: recursive_vreg_1_phi
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -47,23 +47,23 @@ body: |
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; CHECK-NEXT: [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
- ; CHECK-LABEL: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.3(0x{{[0-9a-fA-F]+}})
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-DAG: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
- ; CHECK-DAG: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
- ; CHECK-DAG: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
- ; CHECK-DAG: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; CHECK-DAG: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
- ; CHECK-DAG: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
- ; CHECK-DAG: S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
- ; CHECK-DAG: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
- ; CHECK-DAG: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
+ ; CHECK-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
+ ; CHECK-NEXT: S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
+ ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
- ; CHECK-LABEL: bb.2:
- ; CHECK-NEXT: successors: %bb.3(0x{{[0-9a-fA-F]+}})
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
; CHECK-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6
@@ -83,7 +83,7 @@ body: |
; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: successors: %bb.4(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+ ; CHECK-NEXT: successors: %bb.4(0x00000000), %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2
; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
index 39fc7e34505d1..9320735d46592 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-i1-copies -o - %s | FileCheck -check-prefixes=GCN %s
---
@@ -6,28 +7,30 @@ name: phi_with_dependencies1
tracksRegLiveness: true
body: |
; GCN-LABEL: name: phi_with_dependencies1
- ; GCN-LABEL: bb.0:
- ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
- ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; GCN-NEXT: S_BRANCH %bb.1
- ; GCN-LABEL: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
- ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
- ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
- ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
- ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
- ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
- ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-LABEL: bb.2:
- ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
- ; GCN-NEXT: S_ENDPGM 0
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %5, %bb.1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_]], 1, implicit $exec
+ ; GCN-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
+ ; GCN-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000)
%0:sreg_32 = S_MOV_B32 1
@@ -56,32 +59,31 @@ name: phi_with_dependencies2
# SCC def instruction (S_CMP_LG_U32) is above PHI dependancy ($17) definition and can be lowered
tracksRegLiveness: true
body: |
-
- ; GCN-LABEL: bb.0:
- ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
- ; GCN-NEXT: {{ $}}
- ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; GCN-NEXT: S_BRANCH %bb.1
-
- ; GCN-LABEL: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-LABEL: name: phi_with_dependencies2
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
- ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; GCN-NEXT: [[V_MOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_CMP_EQ:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV]], 1, implicit $exec
- ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 %9, $exec, implicit-def $scc
- ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ]], $exec, implicit-def $scc
- ; GCN-NEXT: [[S_OR:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
- ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
- ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
- ; GCN-NEXT: S_BRANCH %bb.2
-
- ; GCN-LABEL: bb.2:
- ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
- ; GCN-NEXT: S_ENDPGM 0
-
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %5, %bb.1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GCN-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 %3, $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
+ ; GCN-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_]], 1, implicit $exec
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000); %bb.1(100.00%)
%0:sreg_32 = S_MOV_B32 1
@@ -113,31 +115,31 @@ name: phi_with_dependencies3
# SCC def instruction (V_MOV_B32_e32) is above PHI dependancy ($17) definition and cannot be lowered
tracksRegLiveness: true
body: |
- ; GCN-LABEL: bb.0:
- ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+ ; GCN-LABEL: name: phi_with_dependencies3
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: {{ $}}
- ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; GCN-NEXT: S_BRANCH %bb.1
- ; GCN-LABEL: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
- ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
- ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
- ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
- ; GCN-NEXT: [[HIDE_CSS:%[0-9]+]]:sreg_64 = S_CSELECT_B64 1, 0, implicit $scc
- ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
- ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
- ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
- ; GCN-NEXT: S_CMP_LG_U64 [[HIDE_CSS]], 0, implicit-def $scc, implicit-def $scc
- ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-LABEL: bb.2:
- ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
- ; GCN-NEXT: S_ENDPGM 0
-
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %5, %bb.1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; GCN-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+ ; GCN-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 %3, $exec, implicit-def $scc
+ ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec, implicit-def $scc
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_]], 1, implicit $exec
+ ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
+ ; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1(0x80000000); %bb.1(100.00%)
%0:sreg_32 = S_MOV_B32 1
>From 35bb2b01e34f749e531182bf0ea39014b03b236e Mon Sep 17 00:00:00 2001
From: idubinov <53053614+idubinov at users.noreply.github.com>
Date: Mon, 2 Feb 2026 17:33:05 +0100
Subject: [PATCH 3/6] Apply suggestions from code review
Co-authored-by: Shilei Tian <i at tianshilei.me>
---
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index ef675eb958958..a23a3c895d6ed 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -491,7 +491,7 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
/// Move instruction to a new position inside the same MBB, if there is no
/// operand's dependencies. Change the InstrToMovePos after the moved
/// instruction. returns true if instruction moved, false if not.
-bool moveIfPossible(MachineBasicBlock &MBB,
+static bool moveIfPossible(MachineBasicBlock &MBB,
llvm::MachineBasicBlock::iterator &InstrToMovePos,
const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
MachineInstr &MI = *InstrToMovePos;
@@ -502,18 +502,20 @@ bool moveIfPossible(MachineBasicBlock &MBB,
continue;
if (MO.isUse()) {
for (auto I = std::next(MI.getIterator()); I != MoveAfterPos; ++I) {
- for (const MachineOperand &MOI : I->operands())
+ for (const MachineOperand &MOI : I->operands()) {
if (MOI.isReg() && MOI.isDef() && MOI.getReg() == MO.getReg())
return false;
+ }
}
}
// Check if MI defines any register used before InsertPos
if (MO.isDef()) {
for (auto I = MoveAfterPos; I != MI.getIterator(); --I) {
- for (const MachineOperand &MOI : I->operands())
+ for (const MachineOperand &MOI : I->operands()) {
if (MOI.isReg() && MOI.isUse() && MOI.getReg() == MO.getReg())
return false;
+ }
}
}
}
@@ -556,9 +558,8 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
Register R = MO.getReg();
- if (R == Incoming.Reg) {
+ if (R == Incoming.Reg)
curRegDefPos = I;
- }
if (R == AMDGPU::SCC) {
sccDefPos = I;
>From 81dd10e03f0a3830f29eabfbf1a34bdaec81d508 Mon Sep 17 00:00:00 2001
From: idubinov <53053614+idubinov at users.noreply.github.com>
Date: Mon, 2 Feb 2026 17:33:42 +0100
Subject: [PATCH 4/6] Update llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
Co-authored-by: Shilei Tian <i at tianshilei.me>
---
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index a23a3c895d6ed..fdc73505a31d7 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -609,7 +609,6 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
.addReg(SavedSCC)
.addImm(0)
.addReg(AMDGPU::SCC, RegState::ImplicitDefine);
- return;
}
bool PhiLoweringHelper::lowerPhis() {
>From c4f29b3a956f1b44666c4ebad12f293106a8a02c Mon Sep 17 00:00:00 2001
From: idubinov <igor.dubinov at amd.com>
Date: Tue, 3 Feb 2026 08:33:37 -0600
Subject: [PATCH 5/6] Use LaneMaskConstants
---
.../AMDGPUGlobalISelDivergenceLowering.cpp | 8 +--
llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h | 4 ++
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 63 +++++++------------
llvm/lib/Target/AMDGPU/SILowerI1Copies.h | 14 +----
4 files changed, 32 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index f924335844da2..d670a55dbeefc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -173,9 +173,9 @@ void DivergenceLoweringHelper::buildMergeLaneMasks(
Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
B.setInsertPt(MBB, I);
- B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
- B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
- B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
+ B.buildInstr(LMC.AndN2Opc, {PrevMaskedReg}, {PrevRegCopy, LMC.ExecReg});
+ B.buildInstr(LMC.AndOpc, {CurMaskedReg}, {LMC.ExecReg, CurRegCopy});
+ B.buildInstr(LMC.OrOpc, {DstReg}, {PrevMaskedReg, CurMaskedReg});
}
// GlobalISel has to constrain S1 incoming taken as-is with lane mask register
@@ -218,7 +218,7 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
Register VgprReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
B.buildInstr(AMDGPU::COPY, {VgprReg}, {Reg})
- .addUse(ExecReg, RegState::Implicit);
+ .addUse(LMC.ExecReg, RegState::Implicit);
replaceUsesOfRegInInstWith(Reg, UseInst, VgprReg);
TDCache[Reg] = VgprReg;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
index df80196d95176..95d88c7af368c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
@@ -31,10 +31,12 @@ class LaneMaskConstants {
const unsigned AndSaveExecTermOpc;
const unsigned BfmOpc;
const unsigned CMovOpc;
+ const unsigned CmpLGOp;
const unsigned CSelectOpc;
const unsigned MovOpc;
const unsigned MovTermOpc;
const unsigned OrOpc;
+ const unsigned OrN2Op;
const unsigned OrTermOpc;
const unsigned OrSaveExecOpc;
const unsigned XorOpc;
@@ -57,10 +59,12 @@ class LaneMaskConstants {
: AMDGPU::S_AND_SAVEEXEC_B64_term),
BfmOpc(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64),
CMovOpc(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
+ CmpLGOp(IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64),
CSelectOpc(IsWave32 ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64),
MovOpc(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
MovTermOpc(IsWave32 ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term),
OrOpc(IsWave32 ? AMDGPU::S_OR_B32 : AMDGPU::S_OR_B64),
+ OrN2Op(IsWave32 ? AMDGPU::S_ORN2_B32 : AMDGPU::S_ORN2_B64),
OrTermOpc(IsWave32 ? AMDGPU::S_OR_B32_term : AMDGPU::S_OR_B64_term),
OrSaveExecOpc(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64),
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index fdc73505a31d7..de17df71fc936 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -23,6 +23,7 @@
#include "SILowerI1Copies.h"
#include "AMDGPU.h"
+#include "AMDGPULaneMaskUtils.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/InitializePasses.h"
#include <iterator>
@@ -443,35 +444,12 @@ bool Vreg1LoweringHelper::lowerCopiesFromI1() {
PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
MachineDominatorTree *DT,
MachinePostDominatorTree *PDT)
- : MF(MF), DT(DT), PDT(PDT) {
+ : MF(MF), DT(DT), PDT(PDT),
+ LMC(AMDGPU::LaneMaskConstants::get(MF->getSubtarget<GCNSubtarget>())) {
MRI = &MF->getRegInfo();
ST = &MF->getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
- WavefrontSize = ST->getWavefrontSize();
- assert((WavefrontSize == 32 || WavefrontSize == 64));
-
- if (WavefrontSize == 32) {
- ExecReg = AMDGPU::EXEC_LO;
- MovOp = AMDGPU::S_MOV_B32;
- AndOp = AMDGPU::S_AND_B32;
- OrOp = AMDGPU::S_OR_B32;
- XorOp = AMDGPU::S_XOR_B32;
- AndN2Op = AMDGPU::S_ANDN2_B32;
- OrN2Op = AMDGPU::S_ORN2_B32;
- CSelectOp = AMDGPU::S_CSELECT_B32;
- CmpLGOp = AMDGPU::S_CMP_LG_U32;
- } else {
- ExecReg = AMDGPU::EXEC;
- MovOp = AMDGPU::S_MOV_B64;
- AndOp = AMDGPU::S_AND_B64;
- OrOp = AMDGPU::S_OR_B64;
- XorOp = AMDGPU::S_XOR_B64;
- AndN2Op = AMDGPU::S_ANDN2_B64;
- OrN2Op = AMDGPU::S_ORN2_B64;
- CSelectOp = AMDGPU::S_CSELECT_B64;
- CmpLGOp = AMDGPU::S_CMP_LG_U64;
- }
}
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
@@ -491,9 +469,10 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
/// Move instruction to a new position inside the same MBB, if there is no
/// operand's dependencies. Change the InstrToMovePos after the moved
/// instruction. returns true if instruction moved, false if not.
-static bool moveIfPossible(MachineBasicBlock &MBB,
- llvm::MachineBasicBlock::iterator &InstrToMovePos,
- const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+static bool
+moveIfPossible(MachineBasicBlock &MBB,
+ llvm::MachineBasicBlock::iterator &InstrToMovePos,
+ const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
MachineInstr &MI = *InstrToMovePos;
for (const MachineOperand &MO : MI.operands()) {
@@ -597,7 +576,7 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
Register SavedSCC = MRI->createVirtualRegister(
WavefrontSize == 32 ? &AMDGPU::SReg_32RegClass
: &AMDGPU::SReg_64RegClass);
- BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CSelectOp), SavedSCC)
+ BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CSelectOpc), SavedSCC)
.addImm(1)
.addImm(0);
@@ -605,7 +584,7 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
DstReg, Incoming.Reg);
/// restore SCC
- BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CmpLGOp))
+ BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CmpLGOp))
.addReg(SavedSCC)
.addImm(0)
.addReg(AMDGPU::SCC, RegState::ImplicitDefine);
@@ -816,7 +795,7 @@ bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
return false;
}
- if (MI->getOpcode() != MovOp)
+ if (MI->getOpcode() != LMC.MovOpc)
return false;
if (!MI->getOperand(1).isImm())
@@ -920,10 +899,10 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
if (PrevVal == CurVal) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
} else if (CurVal) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(LMC.ExecReg);
} else {
- BuildMI(MBB, I, DL, TII->get(XorOp), DstReg)
- .addReg(ExecReg)
+ BuildMI(MBB, I, DL, TII->get(LMC.XorOpc), DstReg)
+ .addReg(LMC.ExecReg)
.addImm(-1);
}
return;
@@ -936,9 +915,9 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
PrevMaskedReg = PrevReg;
} else {
PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
+ BuildMI(MBB, I, DL, TII->get(LMC.AndN2Opc), PrevMaskedReg)
.addReg(PrevReg)
- .addReg(ExecReg);
+ .addReg(LMC.ExecReg);
}
}
if (!CurConstant) {
@@ -947,9 +926,9 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
CurMaskedReg = CurReg;
} else {
CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
+ BuildMI(MBB, I, DL, TII->get(LMC.AndOpc), CurMaskedReg)
.addReg(CurReg)
- .addReg(ExecReg);
+ .addReg(LMC.ExecReg);
}
}
@@ -960,13 +939,13 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
.addReg(PrevMaskedReg);
} else if (PrevConstant && PrevVal) {
- BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg)
+ BuildMI(MBB, I, DL, TII->get(LMC.OrN2Op), DstReg)
.addReg(CurMaskedReg)
- .addReg(ExecReg);
+ .addReg(LMC.ExecReg);
} else {
- BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
+ BuildMI(MBB, I, DL, TII->get(LMC.OrOpc), DstReg)
.addReg(PrevMaskedReg)
- .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
+ .addReg(CurMaskedReg ? CurMaskedReg : LMC.ExecReg);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index bf96a07a611f4..ee8d3c713143d 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPULaneMaskUtils.h"
#include "GCNSubtarget.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachinePostDominators.h"
@@ -42,7 +43,7 @@ class PhiLoweringHelper {
virtual ~PhiLoweringHelper() = default;
protected:
- unsigned WavefrontSize;
+ unsigned WavefrontSize; /// grem
MachineFunction *MF = nullptr;
MachineDominatorTree *DT = nullptr;
MachinePostDominatorTree *PDT = nullptr;
@@ -50,21 +51,12 @@ class PhiLoweringHelper {
const GCNSubtarget *ST = nullptr;
const SIInstrInfo *TII = nullptr;
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs;
+ const AMDGPU::LaneMaskConstants &LMC;
#ifndef NDEBUG
DenseSet<Register> PhiRegisters;
#endif
- Register ExecReg;
- unsigned MovOp;
- unsigned AndOp;
- unsigned OrOp;
- unsigned XorOp;
- unsigned AndN2Op;
- unsigned OrN2Op;
- unsigned CSelectOp;
- unsigned CmpLGOp;
-
public:
bool lowerPhis();
bool isConstantLaneMask(Register Reg, bool &Val) const;
>From 1da5e7811c06eb58fe35db9e30a369d4229e4c9a Mon Sep 17 00:00:00 2001
From: idubinov <igor.dubinov at amd.com>
Date: Tue, 3 Feb 2026 10:15:27 -0600
Subject: [PATCH 6/6] Revert "Use LaneMaskConstants"
This reverts commit c4f29b3a956f1b44666c4ebad12f293106a8a02c.
---
.../AMDGPUGlobalISelDivergenceLowering.cpp | 8 +--
llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h | 4 --
llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 63 ++++++++++++-------
llvm/lib/Target/AMDGPU/SILowerI1Copies.h | 14 ++++-
4 files changed, 57 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index d670a55dbeefc..f924335844da2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -173,9 +173,9 @@ void DivergenceLoweringHelper::buildMergeLaneMasks(
Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
B.setInsertPt(MBB, I);
- B.buildInstr(LMC.AndN2Opc, {PrevMaskedReg}, {PrevRegCopy, LMC.ExecReg});
- B.buildInstr(LMC.AndOpc, {CurMaskedReg}, {LMC.ExecReg, CurRegCopy});
- B.buildInstr(LMC.OrOpc, {DstReg}, {PrevMaskedReg, CurMaskedReg});
+ B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
+ B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
+ B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
}
// GlobalISel has to constrain S1 incoming taken as-is with lane mask register
@@ -218,7 +218,7 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
Register VgprReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
B.buildInstr(AMDGPU::COPY, {VgprReg}, {Reg})
- .addUse(LMC.ExecReg, RegState::Implicit);
+ .addUse(ExecReg, RegState::Implicit);
replaceUsesOfRegInInstWith(Reg, UseInst, VgprReg);
TDCache[Reg] = VgprReg;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
index 95d88c7af368c..df80196d95176 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
@@ -31,12 +31,10 @@ class LaneMaskConstants {
const unsigned AndSaveExecTermOpc;
const unsigned BfmOpc;
const unsigned CMovOpc;
- const unsigned CmpLGOp;
const unsigned CSelectOpc;
const unsigned MovOpc;
const unsigned MovTermOpc;
const unsigned OrOpc;
- const unsigned OrN2Op;
const unsigned OrTermOpc;
const unsigned OrSaveExecOpc;
const unsigned XorOpc;
@@ -59,12 +57,10 @@ class LaneMaskConstants {
: AMDGPU::S_AND_SAVEEXEC_B64_term),
BfmOpc(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64),
CMovOpc(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
- CmpLGOp(IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64),
CSelectOpc(IsWave32 ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64),
MovOpc(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
MovTermOpc(IsWave32 ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term),
OrOpc(IsWave32 ? AMDGPU::S_OR_B32 : AMDGPU::S_OR_B64),
- OrN2Op(IsWave32 ? AMDGPU::S_ORN2_B32 : AMDGPU::S_ORN2_B64),
OrTermOpc(IsWave32 ? AMDGPU::S_OR_B32_term : AMDGPU::S_OR_B64_term),
OrSaveExecOpc(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64),
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index de17df71fc936..fdc73505a31d7 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -23,7 +23,6 @@
#include "SILowerI1Copies.h"
#include "AMDGPU.h"
-#include "AMDGPULaneMaskUtils.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/InitializePasses.h"
#include <iterator>
@@ -444,12 +443,35 @@ bool Vreg1LoweringHelper::lowerCopiesFromI1() {
PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
MachineDominatorTree *DT,
MachinePostDominatorTree *PDT)
- : MF(MF), DT(DT), PDT(PDT),
- LMC(AMDGPU::LaneMaskConstants::get(MF->getSubtarget<GCNSubtarget>())) {
+ : MF(MF), DT(DT), PDT(PDT) {
MRI = &MF->getRegInfo();
ST = &MF->getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
+ WavefrontSize = ST->getWavefrontSize();
+ assert((WavefrontSize == 32 || WavefrontSize == 64));
+
+ if (WavefrontSize == 32) {
+ ExecReg = AMDGPU::EXEC_LO;
+ MovOp = AMDGPU::S_MOV_B32;
+ AndOp = AMDGPU::S_AND_B32;
+ OrOp = AMDGPU::S_OR_B32;
+ XorOp = AMDGPU::S_XOR_B32;
+ AndN2Op = AMDGPU::S_ANDN2_B32;
+ OrN2Op = AMDGPU::S_ORN2_B32;
+ CSelectOp = AMDGPU::S_CSELECT_B32;
+ CmpLGOp = AMDGPU::S_CMP_LG_U32;
+ } else {
+ ExecReg = AMDGPU::EXEC;
+ MovOp = AMDGPU::S_MOV_B64;
+ AndOp = AMDGPU::S_AND_B64;
+ OrOp = AMDGPU::S_OR_B64;
+ XorOp = AMDGPU::S_XOR_B64;
+ AndN2Op = AMDGPU::S_ANDN2_B64;
+ OrN2Op = AMDGPU::S_ORN2_B64;
+ CSelectOp = AMDGPU::S_CSELECT_B64;
+ CmpLGOp = AMDGPU::S_CMP_LG_U64;
+ }
}
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
@@ -469,10 +491,9 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
/// Move instruction to a new position inside the same MBB, if there is no
/// operand's dependencies. Change the InstrToMovePos after the moved
/// instruction. returns true if instruction moved, false if not.
-static bool
-moveIfPossible(MachineBasicBlock &MBB,
- llvm::MachineBasicBlock::iterator &InstrToMovePos,
- const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+static bool moveIfPossible(MachineBasicBlock &MBB,
+ llvm::MachineBasicBlock::iterator &InstrToMovePos,
+ const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
MachineInstr &MI = *InstrToMovePos;
for (const MachineOperand &MO : MI.operands()) {
@@ -576,7 +597,7 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
Register SavedSCC = MRI->createVirtualRegister(
WavefrontSize == 32 ? &AMDGPU::SReg_32RegClass
: &AMDGPU::SReg_64RegClass);
- BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CSelectOpc), SavedSCC)
+ BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CSelectOp), SavedSCC)
.addImm(1)
.addImm(0);
@@ -584,7 +605,7 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
DstReg, Incoming.Reg);
/// restore SCC
- BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CmpLGOp))
+ BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CmpLGOp))
.addReg(SavedSCC)
.addImm(0)
.addReg(AMDGPU::SCC, RegState::ImplicitDefine);
@@ -795,7 +816,7 @@ bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
return false;
}
- if (MI->getOpcode() != LMC.MovOpc)
+ if (MI->getOpcode() != MovOp)
return false;
if (!MI->getOperand(1).isImm())
@@ -899,10 +920,10 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
if (PrevVal == CurVal) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
} else if (CurVal) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(LMC.ExecReg);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg);
} else {
- BuildMI(MBB, I, DL, TII->get(LMC.XorOpc), DstReg)
- .addReg(LMC.ExecReg)
+ BuildMI(MBB, I, DL, TII->get(XorOp), DstReg)
+ .addReg(ExecReg)
.addImm(-1);
}
return;
@@ -915,9 +936,9 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
PrevMaskedReg = PrevReg;
} else {
PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- BuildMI(MBB, I, DL, TII->get(LMC.AndN2Opc), PrevMaskedReg)
+ BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
.addReg(PrevReg)
- .addReg(LMC.ExecReg);
+ .addReg(ExecReg);
}
}
if (!CurConstant) {
@@ -926,9 +947,9 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
CurMaskedReg = CurReg;
} else {
CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
- BuildMI(MBB, I, DL, TII->get(LMC.AndOpc), CurMaskedReg)
+ BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
.addReg(CurReg)
- .addReg(LMC.ExecReg);
+ .addReg(ExecReg);
}
}
@@ -939,13 +960,13 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
.addReg(PrevMaskedReg);
} else if (PrevConstant && PrevVal) {
- BuildMI(MBB, I, DL, TII->get(LMC.OrN2Op), DstReg)
+ BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg)
.addReg(CurMaskedReg)
- .addReg(LMC.ExecReg);
+ .addReg(ExecReg);
} else {
- BuildMI(MBB, I, DL, TII->get(LMC.OrOpc), DstReg)
+ BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
.addReg(PrevMaskedReg)
- .addReg(CurMaskedReg ? CurMaskedReg : LMC.ExecReg);
+ .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index ee8d3c713143d..bf96a07a611f4 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPULaneMaskUtils.h"
#include "GCNSubtarget.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachinePostDominators.h"
@@ -43,7 +42,7 @@ class PhiLoweringHelper {
virtual ~PhiLoweringHelper() = default;
protected:
- unsigned WavefrontSize; /// grem
+ unsigned WavefrontSize;
MachineFunction *MF = nullptr;
MachineDominatorTree *DT = nullptr;
MachinePostDominatorTree *PDT = nullptr;
@@ -51,12 +50,21 @@ class PhiLoweringHelper {
const GCNSubtarget *ST = nullptr;
const SIInstrInfo *TII = nullptr;
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs;
- const AMDGPU::LaneMaskConstants &LMC;
#ifndef NDEBUG
DenseSet<Register> PhiRegisters;
#endif
+ Register ExecReg;
+ unsigned MovOp;
+ unsigned AndOp;
+ unsigned OrOp;
+ unsigned XorOp;
+ unsigned AndN2Op;
+ unsigned OrN2Op;
+ unsigned CSelectOp;
+ unsigned CmpLGOp;
+
public:
bool lowerPhis();
bool isConstantLaneMask(Register Reg, bool &Val) const;
More information about the llvm-commits
mailing list