[clang] [llvm] [AMDGPU][WIP] Add support for i64/f64 readlane, writelane and readfirstlane operations. (PR #89217)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Thu May 2 11:09:27 PDT 2024
================
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
+ SelectionDAG &DAG) {
+ auto VT = N->getValueType(0);
+ unsigned ValSize = VT.getSizeInBits();
+ unsigned IntrinsicID = N->getConstantOperandVal(0);
+ SDValue Src0 = N->getOperand(1);
+ SDLoc SL(N);
+ MVT IntVT = MVT::getIntegerVT(ValSize);
+
+ auto createLaneOp = [&](SDValue &Src0, SDValue &Src1, SDValue &Src2,
+ MVT VT) -> SDValue {
+ return (Src2.getNode()
+ ? DAG.getNode(AMDGPUISD::WRITELANE, SL, VT, {Src0, Src1, Src2})
+ : Src1.getNode()
+ ? DAG.getNode(AMDGPUISD::READLANE, SL, VT, {Src0, Src1})
+ : DAG.getNode(AMDGPUISD::READFIRSTLANE, SL, VT, {Src0}));
+ };
+
+ SDValue Src1, Src2, Src0Valid, Src2Valid;
+ if (IntrinsicID == Intrinsic::amdgcn_readlane ||
+ IntrinsicID == Intrinsic::amdgcn_writelane) {
+ Src1 = N->getOperand(2);
+ if (IntrinsicID == Intrinsic::amdgcn_writelane)
+ Src2 = N->getOperand(3);
+ }
+
+ if (ValSize == 32) {
+ if (VT == MVT::i32)
+ // Already legal
+ return SDValue();
+ Src0Valid = DAG.getBitcast(IntVT, Src0);
+ if (Src2.getNode())
+ Src2Valid = DAG.getBitcast(IntVT, Src2);
+ auto LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid, MVT::i32);
+ return DAG.getBitcast(VT, LaneOp);
+ }
+
+ if (ValSize < 32) {
+ auto InitBitCast = DAG.getBitcast(IntVT, Src0);
+ Src0Valid = DAG.getAnyExtOrTrunc(InitBitCast, SL, MVT::i32);
+ if (Src2.getNode()) {
+ auto Src2Cast = DAG.getBitcast(IntVT, Src2);
+ Src2Valid = DAG.getAnyExtOrTrunc(Src2Cast, SL, MVT::i32);
+ }
+ auto LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid, MVT::i32);
+ auto Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT);
+ return DAG.getBitcast(VT, Trunc);
+ }
+
+ if ((ValSize % 32) == 0) {
+ MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32);
+ Src0Valid = DAG.getBitcast(VecVT, Src0);
+
+ if (Src2.getNode())
+ Src2Valid = DAG.getBitcast(VecVT, Src2);
+
+ auto LaneOp = createLaneOp(Src0Valid, Src1, Src2Valid, VecVT);
+ auto UnrolledLaneOp = DAG.UnrollVectorOp(LaneOp.getNode());
----------------
arsenm wrote:
no autos
https://github.com/llvm/llvm-project/pull/89217
More information about the cfe-commits
mailing list