[llvm] [amdgpu] Add llvm.amdgcn.init.whole.wave intrinsic (PR #105822)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 11:56:44 PDT 2024
================
@@ -15671,6 +15671,133 @@ static int getAlignedAGPRClassID(unsigned UnalignedClassID) {
}
}
+static void removeInitWholeWaveBranch(MachineFunction &MF,
+ MachineRegisterInfo &MRI,
+ const SIInstrInfo *TII) {
+ // Remove SI_INIT_WHOLE_WAVE and the following SI_IF/END_CF and instead set
+ // EXEC to -1 at SI_END_CF.
+ auto IWWIt = find_if(MF.begin()->instrs(), [](const MachineInstr &MI) {
+ return MI.getOpcode() == AMDGPU::SI_INIT_WHOLE_WAVE;
+ });
+ if (IWWIt == MF.begin()->instr_end())
+ return; // We've been here before (GISel runs finalizeLowering twice).
+
+ MachineInstr &If = *MRI.use_begin(IWWIt->getOperand(0).getReg())->getParent();
+ assert(If.getOpcode() == AMDGPU::SI_IF &&
+ "Unexpected user for init.whole.wave result");
+ assert(MRI.hasOneUse(IWWIt->getOperand(0).getReg()) &&
+ "Expected simple control flow");
+
+ MachineInstr &EndCf = *MRI.use_begin(If.getOperand(0).getReg())->getParent();
+ MachineBasicBlock *EndBB = EndCf.getParent();
+
+ // Update all the Phis: since we're removing a predecessor, we need to remove
+ // the corresponding pair of operands. However, we can't just drop the value
+ // coming from the 'if' block - that's going to be the value of the inactive
+ // lanes.
+ // %v = phi (%inactive, %if), (%active1, %shader1), ... (%activeN, %shaderN)
+ // should become
+ // %t = phi (%active1, %shader1), ... (%activeN, %shaderN)
+ // %v = v_set_inactive %t, %inactive
+ // Note that usually EndCf will be the first instruction after the phis and as
+ // such will serve as the end of the range when iterating over phis.
+ // Therefore, we shouldn't introduce any new instructions before it.
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ auto AfterEndCf = std::next(EndCf.getIterator());
+ for (auto &Phi : EndBB->phis()) {
+ Register PhiDest = Phi.getOperand(0).getReg();
+ const TargetRegisterClass *PhiRC = MRI.getRegClass(PhiDest);
+
+ Register NewPhiDest = MRI.createVirtualRegister(PhiRC);
+ Phi.getOperand(0).setReg(NewPhiDest);
+
+ unsigned InactiveOpIdx = 0;
+ for (unsigned I = 1; I < Phi.getNumOperands(); I += 2) {
+ if (Phi.getOperand(I + 1).getMBB() == If.getParent()) {
+ InactiveOpIdx = I;
+ break;
+ }
+ }
+ assert(InactiveOpIdx != 0 && "Broken phi?");
+
+ // At this point, the register class could be larger than 32 or 64, so we
+ // might have to use more than one V_SET_INACTIVE instruction.
+ unsigned Size = TRI.getRegSizeInBits(*PhiRC);
+ switch (Size) {
+ case 32:
+ BuildMI(*EndBB, AfterEndCf, Phi.getDebugLoc(),
+ TII->get(AMDGPU::V_SET_INACTIVE_B32), PhiDest)
+ .addReg(NewPhiDest)
+ .add(Phi.getOperand(InactiveOpIdx));
+ break;
+ case 64:
+ BuildMI(*EndBB, AfterEndCf, Phi.getDebugLoc(),
+ TII->get(AMDGPU::V_SET_INACTIVE_B64), PhiDest)
+ .addReg(NewPhiDest)
+ .add(Phi.getOperand(InactiveOpIdx));
+ break;
+ default: {
+ // For each 32-bit subregister of the register at InactiveOpIdx, insert
+ // a COPY to a new register, and a V_SET_INACTIVE_B32 using the
+ // corresponding subregisters of PhiDest and NewPhiDest.
+ // FIXME: There has to be a better way to iterate over this...
----------------
arsenm wrote:
Is this just getSubRegFromChannel?
https://github.com/llvm/llvm-project/pull/105822
More information about the llvm-commits
mailing list