[clang] [llvm] [NVPTX] Add intrinsics for cvt .f6x2 and .ue8m0x2 variants (PR #134345)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 13:58:46 PDT 2025
================
@@ -1021,6 +1036,174 @@ __device__ void nvvm_cvt_sm89() {
__nvvm_e5m2x2_to_f16x2_rn(0x4c4c);
// CHECK_PTX81_SM89: call <2 x half> @llvm.nvvm.e5m2x2.to.f16x2.rn.relu(i16 19532)
__nvvm_e5m2x2_to_f16x2_rn_relu(0x4c4c);
+
+ // CHECK_PTX81_SM89: call i32 @llvm.nvvm.f2tf32.rna.satfinite(float 1.000000e+00)
+ __nvvm_f2tf32_rna_satfinite(1.0f);
+#endif
+ // CHECK: ret void
+}
+
+// CHECK-LABEL: nvvm_cvt_sm90
+__device__ void nvvm_cvt_sm90() {
+#if (PTX >= 78) && (__CUDA_ARCH__ >= 900)
+ // CHECK_PTX78_SM90: call i32 @llvm.nvvm.f2tf32.rn(float 1.000000e+00)
+ __nvvm_f2tf32_rn(1.0f);
+ // CHECK_PTX78_SM90: call i32 @llvm.nvvm.f2tf32.rn.relu(float 1.000000e+00)
+ __nvvm_f2tf32_rn_relu(1.0f);
+ // CHECK_PTX78_SM90: call i32 @llvm.nvvm.f2tf32.rz(float 1.000000e+00)
+ __nvvm_f2tf32_rz(1.0f);
+ // CHECK_PTX78_SM90: call i32 @llvm.nvvm.f2tf32.rz.relu(float 1.000000e+00)
+ __nvvm_f2tf32_rz_relu(1.0f);
+#endif
+ // CHECK: ret void
+}
+
+// CHECK-LABEL: nvvm_cvt_sm100
+__device__ void nvvm_cvt_sm100() {
+#if (PTX >= 86) && (__CUDA_ARCH__ >= 1000)
+ // CHECK_PTX86_SM100: call i32 @llvm.nvvm.f2tf32.rn.satfinite(float 1.000000e+00)
+ __nvvm_f2tf32_rn_satfinite(1.0f);
+ // CHECK_PTX86_SM100: call i32 @llvm.nvvm.f2tf32.rn.relu.satfinite(float 1.000000e+00)
+ __nvvm_f2tf32_rn_relu_satfinite(1.0f);
+ // CHECK_PTX86_SM100: call i32 @llvm.nvvm.f2tf32.rz.satfinite(float 1.000000e+00)
+ __nvvm_f2tf32_rz_satfinite(1.0f);
+ // CHECK_PTX86_SM100: call i32 @llvm.nvvm.f2tf32.rz.relu.satfinite(float 1.000000e+00)
+ __nvvm_f2tf32_rz_relu_satfinite(1.0f);
+#endif
+ // CHECK: ret void
+}
+
+// CHECK-LABEL: nvvm_cvt_sm100a
+__device__ void nvvm_cvt_sm100a() {
+#if (PTX >= 86) && __CUDA_ARCH_FEAT_SM100_ALL
----------------
Artem-B wrote:
Do we really need to replicate this function three times?
Can we just keep a single instance, and use
```
#if __CUDA_ARCH_FEAT_SM100_ALL || __CUDA_ARCH_FEAT_SM101_ALL || __CUDA_ARCH_FEAT_SM120_ALL
```
https://github.com/llvm/llvm-project/pull/134345
More information about the llvm-commits
mailing list