[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: RBLegalize (PR #112864)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Oct 19 08:06:18 PDT 2024
================
@@ -107,3 +107,183 @@ void IntrinsicLaneMaskAnalyzer::findLCSSAPhi(Register Reg) {
S32S64LaneMask.insert(LCSSAPhi.getOperand(0).getReg());
}
}
+
+MachineInstrBuilder AMDGPU::buildReadAnyLaneB32(MachineIRBuilder &B,
+ const DstOp &SgprDst,
+ const SrcOp &VgprSrc,
+ const RegisterBankInfo &RBI) {
+ auto RFL = B.buildInstr(AMDGPU::G_READANYLANE, {SgprDst}, {VgprSrc});
+ Register Dst = RFL->getOperand(0).getReg();
+ Register Src = RFL->getOperand(1).getReg();
+ MachineRegisterInfo &MRI = *B.getMRI();
+ if (!MRI.getRegBankOrNull(Dst))
+ MRI.setRegBank(Dst, RBI.getRegBank(SGPRRegBankID));
+ if (!MRI.getRegBankOrNull(Src))
+ MRI.setRegBank(Src, RBI.getRegBank(VGPRRegBankID));
+ return RFL;
+}
+
+MachineInstrBuilder
+AMDGPU::buildReadAnyLaneSequenceOfB32(MachineIRBuilder &B, const DstOp &SgprDst,
+ const SrcOp &VgprSrc, LLT B32Ty,
+ const RegisterBankInfo &RBI) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ SmallVector<Register, 8> SgprDstParts;
+ auto Unmerge = B.buildUnmerge(B32Ty, VgprSrc);
+ for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
+ SgprDstParts.push_back(
+ buildReadAnyLaneB32(B, B32Ty, Unmerge.getReg(i), RBI).getReg(0));
+ }
+
+ auto Merge = B.buildMergeLikeInstr(SgprDst, SgprDstParts);
+ MRI.setRegBank(Merge.getReg(0), RBI.getRegBank(AMDGPU::SGPRRegBankID));
+ return Merge;
+}
+
+MachineInstrBuilder
+AMDGPU::buildReadAnyLaneSequenceOfS64(MachineIRBuilder &B, const DstOp &SgprDst,
+ const SrcOp &VgprSrc,
+ const RegisterBankInfo &RBI) {
+ LLT S32 = LLT::scalar(32);
+ LLT S64 = LLT::scalar(64);
+ MachineRegisterInfo &MRI = *B.getMRI();
+ SmallVector<Register, 8> SgprDstParts;
+ auto Unmerge = B.buildUnmerge(S64, VgprSrc);
+
+ for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) {
+ MRI.setRegBank(Unmerge.getReg(i), RBI.getRegBank(AMDGPU::VGPRRegBankID));
+ auto Unmerge64 = B.buildUnmerge(S32, Unmerge.getReg(i));
+ SmallVector<Register, 2> Unmerge64Parts;
+ Unmerge64Parts.push_back(
+ buildReadAnyLaneB32(B, S32, Unmerge64.getReg(0), RBI).getReg(0));
+ Unmerge64Parts.push_back(
+ buildReadAnyLaneB32(B, S32, Unmerge64.getReg(1), RBI).getReg(0));
+ Register MergeReg = B.buildMergeLikeInstr(S64, Unmerge64Parts).getReg(0);
+ MRI.setRegBank(MergeReg, RBI.getRegBank(AMDGPU::SGPRRegBankID));
+ SgprDstParts.push_back(MergeReg);
+ }
+
+ auto Merge = B.buildMergeLikeInstr(SgprDst, SgprDstParts);
+ MRI.setRegBank(Merge.getReg(0), RBI.getRegBank(AMDGPU::SGPRRegBankID));
+ return Merge;
+}
+
+MachineInstrBuilder AMDGPU::buildReadAnyLane(MachineIRBuilder &B,
+ const DstOp &SgprDst,
+ const SrcOp &VgprSrc,
+ const RegisterBankInfo &RBI) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT S16 = LLT::scalar(16);
+ LLT S32 = LLT::scalar(32);
+ LLT S64 = LLT::scalar(64);
+ LLT S256 = LLT::scalar(256);
+ LLT V2S16 = LLT::fixed_vector(2, 16);
+ LLT Ty = SgprDst.getLLTTy(MRI);
+
+ if (Ty == S16) {
+ return B.buildTrunc(
+ SgprDst, buildReadAnyLaneB32(B, S32, B.buildAnyExt(S32, VgprSrc), RBI));
+ }
+
+ if (Ty == S32 || Ty == V2S16 ||
+ (Ty.isPointer() && Ty.getSizeInBits() == 32)) {
+ return buildReadAnyLaneB32(B, SgprDst, VgprSrc, RBI);
+ }
+
+ if (Ty == S64 || Ty == S256 || (Ty.isPointer() && Ty.getSizeInBits() == 64) ||
+ (Ty.isVector() && Ty.getElementType() == S32)) {
+ return buildReadAnyLaneSequenceOfB32(B, SgprDst, VgprSrc, S32, RBI);
+ }
+
+ if (Ty.isVector() && Ty.getElementType() == S16) {
+ return buildReadAnyLaneSequenceOfB32(B, SgprDst, VgprSrc, V2S16, RBI);
+ }
+
+ if (Ty.isVector() && Ty.getElementType() == S64) {
+ return buildReadAnyLaneSequenceOfS64(B, SgprDst, VgprSrc, RBI);
+ }
+
+ llvm_unreachable("Type not supported");
+}
+
+void AMDGPU::buildReadAnyLaneDst(MachineIRBuilder &B, MachineInstr &MI,
+ const RegisterBankInfo &RBI) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ Register Dst = MI.getOperand(0).getReg();
+ const RegisterBank *DstBank = MRI.getRegBankOrNull(Dst);
+ if (DstBank != &RBI.getRegBank(AMDGPU::SGPRRegBankID))
+ return;
+
+ Register VgprDst = MRI.createGenericVirtualRegister(MRI.getType(Dst));
+ MRI.setRegBank(VgprDst, RBI.getRegBank(AMDGPU::VGPRRegBankID));
+
+ MI.getOperand(0).setReg(VgprDst);
+ MachineBasicBlock *MBB = MI.getParent();
+ B.setInsertPt(*MBB, std::next(MI.getIterator()));
+ // readAnyLane VgprDst into Dst after MI.
+ buildReadAnyLane(B, Dst, VgprDst, RBI);
+ return;
+}
+
+bool AMDGPU::isLaneMask(Register Reg, MachineRegisterInfo &MRI,
+ const SIRegisterInfo *TRI) {
+ const RegisterBank *RB = MRI.getRegBankOrNull(Reg);
+ if (RB && RB->getID() == VCCRegBankID)
+ return true;
+
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
+ if (RC && TRI->isSGPRClass(RC) && MRI.getType(Reg) == LLT::scalar(1))
+ return true;
+
+ return false;
+}
+
+bool AMDGPU::isSgprRB(Register Reg, MachineRegisterInfo &MRI) {
----------------
arsenm wrote:
Avoid the RB abbreviation. This is simple enough that I would just expect it done inline in the local pass, or in the AMDGPURegBank header
https://github.com/llvm/llvm-project/pull/112864
More information about the llvm-branch-commits
mailing list