[clang] [llvm] [AMDGPU][WIP] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (PR #89217)
Jay Foad via cfe-commits
cfe-commits at lists.llvm.org
Thu May 9 03:55:12 PDT 2024
================
@@ -5386,6 +5386,130 @@ bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
+ MachineInstr &MI,
+ Intrinsic::ID IID) const {
+
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(2).getReg();
+
+ auto createLaneOp = [&](Register &Src0, Register &Src1,
+ Register &Src2) -> Register {
+ auto LaneOpDst = B.buildIntrinsic(IID, {S32}).addUse(Src0);
+ if (Src2.isValid())
+ return (LaneOpDst.addUse(Src1).addUse(Src2)).getReg(0);
+ if (Src1.isValid())
+ return (LaneOpDst.addUse(Src1)).getReg(0);
+ return LaneOpDst.getReg(0);
+ };
+
+ Register Src1, Src2, Src0Valid, Src2Valid;
+ if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane) {
+ Src1 = MI.getOperand(3).getReg();
+ if (IID == Intrinsic::amdgcn_writelane) {
+ Src2 = MI.getOperand(4).getReg();
+ }
+ }
+
+ LLT Ty = MRI.getType(DstReg);
+ unsigned Size = Ty.getSizeInBits();
+
+ if (Size == 32) {
+ if (Ty.isScalar())
+ // Already legal
+ return true;
+
+ Register Src0Valid = B.buildBitcast(S32, Src0).getReg(0);
+ if (Src2.isValid())
+ Src2Valid = B.buildBitcast(S32, Src2).getReg(0);
+ Register LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid);
+ B.buildBitcast(DstReg, LaneOp);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (Size < 32) {
+ Register Src0Cast = MRI.getType(Src0).isScalar()
+ ? Src0
+ : B.buildBitcast(LLT::scalar(Size), Src0).getReg(0);
+ Src0Valid = B.buildAnyExt(S32, Src0Cast).getReg(0);
+
+ if (Src2.isValid()) {
+ Register Src2Cast =
+ MRI.getType(Src2).isScalar()
+ ? Src2
+ : B.buildBitcast(LLT::scalar(Size), Src2).getReg(0);
+ Src2Valid = B.buildAnyExt(LLT::scalar(32), Src2Cast).getReg(0);
+ }
+ Register LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid);
+ if (Ty.isScalar())
+ B.buildTrunc(DstReg, LaneOp);
+ else {
+ auto Trunc = B.buildTrunc(LLT::scalar(Size), LaneOp);
+ B.buildBitcast(DstReg, Trunc);
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if ((Size % 32) == 0) {
+ SmallVector<Register, 2> PartialRes;
+ unsigned NumParts = Size / 32;
+ auto Src0Parts = B.buildUnmerge(S32, Src0);
+
+ switch (IID) {
+ case Intrinsic::amdgcn_readlane: {
+ Register Src1 = MI.getOperand(3).getReg();
+ for (unsigned i = 0; i < NumParts; ++i)
+ PartialRes.push_back(
+ (B.buildIntrinsic(Intrinsic::amdgcn_readlane, {S32})
+ .addUse(Src0Parts.getReg(i))
+ .addUse(Src1))
+ .getReg(0));
----------------
jayfoad wrote:
Yes, separate patch
https://github.com/llvm/llvm-project/pull/89217
More information about the cfe-commits
mailing list