[clang] [llvm] [clang][NVPTX] Add intrinsics and builtins for CVT RS rounding mode (PR #160494)

Srinivasa Ravi via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 05:26:32 PDT 2025


================
@@ -2839,6 +2845,57 @@ static SDValue LowerClusterLaunchControlQueryCancel(SDValue Op,
                      {TryCancelResponse0, TryCancelResponse1});
 }
 
+static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
+  SDNode *N = Op.getNode();
+  SDLoc DL(N);
+  SDValue F32Vec = N->getOperand(1);
+  SDValue RBits = N->getOperand(2);
+
+  unsigned IntrinsicID = N->getConstantOperandVal(0);
+
+  uint32_t CvtModeFlag = NVPTX::PTXCvtMode::CvtMode::RS;
+
+  // Extract the 4 float elements from the vector
+  SmallVector<SDValue, 6> Ops;
+  for (unsigned i = 0; i < 4; ++i) {
+    Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+                              DAG.getIntPtrConstant(i, DL)));
+  }
+
+  auto OpSignature =
+      [&]() -> std::pair<NVPTXISD::NodeType, MVT::SimpleValueType> {
+    switch (IntrinsicID) {
+    case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
+      CvtModeFlag |= NVPTX::PTXCvtMode::CvtMode::RELU_FLAG;
+    case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_satfinite:
+      return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8};
----------------
Wolfram70 wrote:

That seems better. Changed to a tuple in the latest revision, thanks!

https://github.com/llvm/llvm-project/pull/160494


More information about the llvm-commits mailing list