[clang] [llvm] [clang][NVPTX] Add intrinsics and builtins for CVT RS rounding mode (PR #160494)
Durgadoss R via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 08:31:09 PDT 2025
================
@@ -2839,6 +2845,69 @@ static SDValue LowerClusterLaunchControlQueryCancel(SDValue Op,
{TryCancelResponse0, TryCancelResponse1});
}
+bool isCvtRSReluIntrinsic(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e5m2x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e2m3x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e3m2x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e2m1x4_rs_relu_satfinite:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
+ SDNode *N = Op.getNode();
+ SDLoc DL(N);
+ SDValue F32Vec = N->getOperand(1);
+ SDValue RBits = N->getOperand(2);
+
+ unsigned IntrinsicID = N->getConstantOperandVal(0);
+
+ uint32_t CvtModeFlag = NVPTX::PTXCvtMode::CvtMode::RS;
+ if (isCvtRSReluIntrinsic(IntrinsicID))
+ CvtModeFlag |= NVPTX::PTXCvtMode::CvtMode::RELU_FLAG;
+
+ SDValue Float1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue Float2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+ DAG.getIntPtrConstant(1, DL));
+ SDValue Float3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+ DAG.getIntPtrConstant(2, DL));
+ SDValue Float4 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+ DAG.getIntPtrConstant(3, DL));
+
+ auto OpSignature =
+ [&]() -> std::pair<NVPTXISD::NodeType, MVT::SimpleValueType> {
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_satfinite:
+ return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8};
+ case Intrinsic::nvvm_f32x4_to_e5m2x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e5m2x4_rs_satfinite:
+ return {NVPTXISD::CVT_E5M2X4_F32X4_RS_SF, MVT::v4i8};
+ case Intrinsic::nvvm_f32x4_to_e2m3x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e2m3x4_rs_satfinite:
+ return {NVPTXISD::CVT_E2M3X4_F32X4_RS_SF, MVT::v4i8};
+ case Intrinsic::nvvm_f32x4_to_e3m2x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e3m2x4_rs_satfinite:
+ return {NVPTXISD::CVT_E3M2X4_F32X4_RS_SF, MVT::v4i8};
+ case Intrinsic::nvvm_f32x4_to_e2m1x4_rs_relu_satfinite:
+ case Intrinsic::nvvm_f32x4_to_e2m1x4_rs_satfinite:
+ return {NVPTXISD::CVT_E2M1X4_F32X4_RS_SF, MVT::i16};
----------------
durga4github wrote:
Now that we are swicthing over these anyway here, can we use this same switch-case to set the relu-flag also?
(and avoid a separate function for it)
https://github.com/llvm/llvm-project/pull/160494
More information about the llvm-commits
mailing list