[flang-commits] [flang] [flang][cuda] Update some bind name to fast version and add __sincosf (PR #153744)

Thu Aug 14 21:55:09 PDT 2025

https://github.com/clementval created https://github.com/llvm/llvm-project/pull/153744

Use the fast version in the bind name and reorder these fast math functions. Add missing __sincosf interface. 

>From 69136036415666ee1bfab30af90a5e2d91b5c3dc Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 14 Aug 2025 21:54:10 -0700
Subject: [PATCH] [flang][cuda] Update some bind name to fast version and add
 __sincosf

---
 flang/module/cudadevice.f90                | 98 ++++++++++++----------
 flang/test/Lower/CUDA/cuda-device-proc.cuf |  4 +-
 flang/test/Lower/CUDA/cuda-libdevice.cuf   | 22 +++--
 3 files changed, 70 insertions(+), 54 deletions(-)

diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index ffc3a3b170ca6..58558f7f5cedf 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -394,20 +394,70 @@ attributes(device) subroutine sincospi(x, y, z) bind(c,name='__nv_sincospi')
   end interface
 
   interface
-    attributes(device) real(4) function __cosf(x) bind(c, name='__nv_cosf')
+    attributes(device) real(4) function __cosf(x) bind(c, name='__nv_fast_cosf')
       real(4), value :: x
     end function
   end interface
 
+  interface __exp10f
+    attributes(device) real function __exp10f(r) bind(c, name='__nv_fast_exp10f')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __expf
+    attributes(device) real function __expf(r) bind(c, name='__nv_fast_expf')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __log10f
+    attributes(device) real function __log10f(r) bind(c, name='__nv_fast_log10f')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __log2f
+    attributes(device) real function __log2f(r) bind(c, name='__nv_fast_log2f')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface __logf
+    attributes(device) real function __logf(r) bind(c, name='__nv_fast_logf')
+      !dir$ ignore_tkr (d) r
+      real, value :: r
+    end function
+  end interface
+
+  interface
+    attributes(device) real(4) function __powf(x,y) bind(c, name='__nv_fast_powf')
+      !dir$ ignore_tkr (d) x, y
+      real(4), value :: x, y
+    end function
+  end interface
+
+  interface __sincosf
+    attributes(device) subroutine __sincosf(r, s, c) bind(c, name='__nv_fast_sincosf')
+      !dir$ ignore_tkr (d) r, (d) s, (d) c
+      real, value :: r
+      real :: s, c
+    end subroutine
+  end interface
+
   interface __sinf
-    attributes(device) real function __sinf(r) bind(c, name='__nv_sinf')
+    attributes(device) real function __sinf(r) bind(c, name='__nv_fast_sinf')
       !dir$ ignore_tkr (d) r
       real, value :: r
     end function
   end interface
 
   interface __tanf
-    attributes(device) real function __tanf(r) bind(c, name='__nv_tanf')
+    attributes(device) real function __tanf(r) bind(c, name='__nv_fast_tanf')
       !dir$ ignore_tkr (d) r
       real, value :: r
     end function
@@ -1043,13 +1093,6 @@ attributes(device) real(8) function sinpi(x) bind(c,name='__nv_sinpi')
     end function
   end interface
 
-  interface
-    attributes(device) real(4) function __powf(x,y) bind(c, name='__nv_powf')
-      !dir$ ignore_tkr (d) x, y
-      real(4), value :: x, y
-    end function
-  end interface
-
   interface __brev
     attributes(device) integer function __brev(i) bind(c, name='__nv_brev')
       !dir$ ignore_tkr (d) i
@@ -1909,41 +1952,6 @@ attributes(device,host) logical function on_device() bind(c)
     end function
   end interface
 
-  interface __log2f
-    attributes(device) real function __log2f(r) bind(c, name='__nv_log2f')
-      !dir$ ignore_tkr (d) r
-      real, value :: r
-    end function
-  end interface
-
-  interface __log10f
-    attributes(device) real function __log10f(r) bind(c, name='__nv_log10f')
-      !dir$ ignore_tkr (d) r
-      real, value :: r
-    end function
-  end interface
-
-  interface __logf
-    attributes(device) real function __logf(r) bind(c, name='__nv_logf')
-      !dir$ ignore_tkr (d) r
-      real, value :: r
-    end function
-  end interface
-
-  interface __expf
-    attributes(device) real function __expf(r) bind(c, name='__nv_expf')
-      !dir$ ignore_tkr (d) r
-      real, value :: r
-    end function
-  end interface
-
-  interface __exp10f
-    attributes(device) real function __exp10f(r) bind(c, name='__nv_exp10f')
-      !dir$ ignore_tkr (d) r
-      real, value :: r
-    end function
-  end interface
-
 contains
 
   attributes(device) subroutine syncthreads()
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index a6e8c69b2e52e..5e1f6b66d1d53 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -140,7 +140,7 @@ end
 ! CHECK: %{{.*}} = fir.call @__nv_brevll(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> i64
 ! CHECK: %{{.*}} = fir.call @__nv_clz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> i32
 ! CHECK: %{{.*}} = fir.call @__nv_clzll(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> i32
-! CHECK: %{{.*}} = fir.call @__nv_cosf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_cosf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
 ! CHECK: %{{.*}} = fir.call @__nv_ddiv_rn(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
 ! CHECK: %{{.*}} = fir.call @__nv_ddiv_rz(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
 ! CHECK: %{{.*}} = fir.call @__nv_ddiv_ru(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
@@ -159,7 +159,7 @@ end
 ! CHECK: %{{.*}} = fir.call @__nv_double2uint_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
 ! CHECK: %{{.*}} = fir.call @__nv_mul24(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32) -> i32
 ! CHECK: %{{.*}} = fir.call @__nv_umul24(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32) -> i32
-! CHECK: %{{.*}} = fir.call @__nv_powf(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32, f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_powf(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32, f32) -> f32
 ! CHECK: %{{.*}} = fir.call @__nv_ull2double_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
 ! CHECK: %{{.*}} = fir.call @__nv_ull2double_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
 ! CHECK: %{{.*}} = fir.call @__nv_ull2double_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
diff --git a/flang/test/Lower/CUDA/cuda-libdevice.cuf b/flang/test/Lower/CUDA/cuda-libdevice.cuf
index f9c5dcc5fc4c3..f57a51e3abd5c 100644
--- a/flang/test/Lower/CUDA/cuda-libdevice.cuf
+++ b/flang/test/Lower/CUDA/cuda-libdevice.cuf
@@ -83,9 +83,17 @@ attributes(global) subroutine test_log()
 end subroutine
 
 ! CHECK-LABEL: _QPtest_log
-! CHECK: %{{.*}} = fir.call @__nv_logf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
-! CHECK: %{{.*}} = fir.call @__nv_log2f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
-! CHECK: %{{.*}} = fir.call @__nv_log10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_logf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_log2f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_log10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+
+attributes(global) subroutine test_sincosf()
+  real :: r, s, c
+  call __sincosf(r, s, c)
+end subroutine
+
+! CHECK-LABEL: _QPtest_sincosf
+! CHECK: fir.call @__nv_fast_sincosf(%{{.*}}, %{{.*}}#0, %{{.*}}#0) proc_attrs<bind_c> fastmath<contract> : (f32, !fir.ref<f32>, !fir.ref<f32>) -> () 
 
 attributes(global) subroutine test_sinf()
   real :: res
@@ -94,7 +102,7 @@ attributes(global) subroutine test_sinf()
 end subroutine
 
 ! CHECK-LABEL: _QPtest_sinf
-! CHECK: %{{.*}} = fir.call @__nv_sinf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_sinf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
 
 attributes(global) subroutine test_tanf()
   real :: res
@@ -103,7 +111,7 @@ attributes(global) subroutine test_tanf()
 end subroutine
 
 ! CHECK-LABEL: _QPtest_tanf
-! CHECK: %{{.*}} = fir.call @__nv_tanf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_tanf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
 
 attributes(global) subroutine test_exp()
   real :: res
@@ -113,8 +121,8 @@ attributes(global) subroutine test_exp()
 end subroutine
 
 ! CHECK-LABEL: _QPtest_exp
-! CHECK: %{{.*}} = fir.call @__nv_expf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
-! CHECK: %{{.*}} = fir.call @__nv_exp10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_expf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_exp10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
 
 attributes(global) subroutine test_double2ll_rX()
   integer(8) :: res