[llvm] AMDGPU/GlobalISel: Uniformity info based regbankselect (PR #73684)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 28 22:57:43 PST 2024
================
@@ -700,58 +700,105 @@ static LLT getHalfSizedType(LLT Ty) {
// Build one or more V_READFIRSTLANE_B32 instructions to move the given vector
// source value into a scalar register.
-Register AMDGPURegisterBankInfo::buildReadFirstLane(MachineIRBuilder &B,
- MachineRegisterInfo &MRI,
- Register Src) const {
+Register AMDGPURegisterBankInfo::buildReadFirstLaneSrc(MachineIRBuilder &B,
+ Register Src) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
LLT Ty = MRI.getType(Src);
const RegisterBank *Bank = getRegBank(Src, MRI, *TRI);
- if (Bank == &AMDGPU::SGPRRegBank)
- return Src;
-
- unsigned Bits = Ty.getSizeInBits();
- assert(Bits % 32 == 0);
-
if (Bank != &AMDGPU::VGPRRegBank) {
// We need to copy from AGPR to VGPR
Src = B.buildCopy(Ty, Src).getReg(0);
MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
}
+ Register Dst = MRI.createGenericVirtualRegister(Ty);
+ MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
+ buildReadFirstLaneForType(B, Dst, Src);
+ return Dst;
+}
+
+// Create new vgpr destination register for MI then move it to current
+// MI's sgpr destination using one or more V_READFIRSTLANE_B32 instructions.
+void AMDGPURegisterBankInfo::buildReadFirstLaneDst(MachineIRBuilder &B,
+ MachineInstr &MI) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ Register Dst = MI.getOperand(0).getReg();
+ const RegisterBank *DstBank = getRegBank(Dst, MRI, *TRI);
+ if (DstBank != &AMDGPU::SGPRRegBank)
+ return;
+
+ Register VgprDst = MRI.createGenericVirtualRegister(MRI.getType(Dst));
+ MRI.setRegBank(VgprDst, AMDGPU::VGPRRegBank);
+
+ MI.getOperand(0).setReg(VgprDst);
+ MachineBasicBlock *MBB = MI.getParent();
+ B.setInsertPt(*MBB, std::next(MI.getIterator()));
+ // readFirstLane VgprDst into Dst after MI.
+ return buildReadFirstLaneForType(B, Dst, VgprDst);
+}
+
+void AMDGPURegisterBankInfo::buildReadFirstLaneB32(MachineIRBuilder &B,
+ Register SgprDst,
+ Register VgprSrc) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {SgprDst}, {VgprSrc});
+ MRI.setRegClass(VgprSrc, &AMDGPU::VGPR_32RegClass);
+ MRI.setRegClass(SgprDst, &AMDGPU::SReg_32RegClass);
+}
+
+void AMDGPURegisterBankInfo::buildReadFirstLaneSequenceOfB32(
+ MachineIRBuilder &B, Register SgprDst, Register VgprSrc,
+ unsigned NumElts) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
LLT S32 = LLT::scalar(32);
- unsigned NumParts = Bits / 32;
- SmallVector<Register, 8> SrcParts;
- SmallVector<Register, 8> DstParts;
+ SmallVector<Register, 8> VgprSrcParts;
+ SmallVector<Register, 8> SgprDstParts;
- if (Bits == 32) {
- SrcParts.push_back(Src);
- } else {
- auto Unmerge = B.buildUnmerge(S32, Src);
- for (unsigned i = 0; i < NumParts; ++i)
- SrcParts.push_back(Unmerge.getReg(i));
+ for (unsigned i = 0; i < NumElts; ++i) {
+ VgprSrcParts.push_back(MRI.createGenericVirtualRegister(S32));
+ SgprDstParts.push_back(MRI.createGenericVirtualRegister(S32));
}
- for (unsigned i = 0; i < NumParts; ++i) {
- Register SrcPart = SrcParts[i];
- Register DstPart = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- MRI.setType(DstPart, NumParts == 1 ? Ty : S32);
-
- const TargetRegisterClass *Constrained =
- constrainGenericRegister(SrcPart, AMDGPU::VGPR_32RegClass, MRI);
- (void)Constrained;
- assert(Constrained && "Failed to constrain readfirstlane src reg");
+ B.buildUnmerge(VgprSrcParts, VgprSrc);
+ for (unsigned i = 0; i < NumElts; ++i)
+ buildReadFirstLaneB32(B, SgprDstParts[i], VgprSrcParts[i]);
- B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
+ B.buildMergeLikeInstr(SgprDst, SgprDstParts);
+}
- DstParts.push_back(DstPart);
+void AMDGPURegisterBankInfo::buildReadFirstLaneForType(MachineIRBuilder &B,
+ Register SgprDst,
+ Register VgprSrc) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT S16 = LLT::scalar(16);
+ LLT S32 = LLT::scalar(32);
+ LLT S64 = LLT::scalar(64);
+ LLT Ty = MRI.getType(SgprDst);
+
+ if (Ty == S16) {
+ Register VgprSrc32 = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegBank(VgprSrc32, AMDGPU::VGPRRegBank);
+ Register SgprDst32 = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegBank(SgprDst32, AMDGPU::SGPRRegBank);
+
+ B.buildAnyExt(VgprSrc32, VgprSrc);
----------------
arsenm wrote:
Fold the register creation into the build calls, can set the bank after
https://github.com/llvm/llvm-project/pull/73684
More information about the llvm-commits
mailing list