[flang-commits] [flang] [flang][cuda] Update some bind name to fast version and add __sincosf (PR #153744)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu Aug 14 21:55:09 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/153744
Use the fast version in the bind name and reorder these fast math functions. Add missing __sincosf interface.
>From 69136036415666ee1bfab30af90a5e2d91b5c3dc Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 14 Aug 2025 21:54:10 -0700
Subject: [PATCH] [flang][cuda] Update some bind name to fast version and add
__sincosf
---
flang/module/cudadevice.f90 | 98 ++++++++++++----------
flang/test/Lower/CUDA/cuda-device-proc.cuf | 4 +-
flang/test/Lower/CUDA/cuda-libdevice.cuf | 22 +++--
3 files changed, 70 insertions(+), 54 deletions(-)
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index ffc3a3b170ca6..58558f7f5cedf 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -394,20 +394,70 @@ attributes(device) subroutine sincospi(x, y, z) bind(c,name='__nv_sincospi')
end interface
interface
- attributes(device) real(4) function __cosf(x) bind(c, name='__nv_cosf')
+ attributes(device) real(4) function __cosf(x) bind(c, name='__nv_fast_cosf')
real(4), value :: x
end function
end interface
+ interface __exp10f
+ attributes(device) real function __exp10f(r) bind(c, name='__nv_fast_exp10f')
+ !dir$ ignore_tkr (d) r
+ real, value :: r
+ end function
+ end interface
+
+ interface __expf
+ attributes(device) real function __expf(r) bind(c, name='__nv_fast_expf')
+ !dir$ ignore_tkr (d) r
+ real, value :: r
+ end function
+ end interface
+
+ interface __log10f
+ attributes(device) real function __log10f(r) bind(c, name='__nv_fast_log10f')
+ !dir$ ignore_tkr (d) r
+ real, value :: r
+ end function
+ end interface
+
+ interface __log2f
+ attributes(device) real function __log2f(r) bind(c, name='__nv_fast_log2f')
+ !dir$ ignore_tkr (d) r
+ real, value :: r
+ end function
+ end interface
+
+ interface __logf
+ attributes(device) real function __logf(r) bind(c, name='__nv_fast_logf')
+ !dir$ ignore_tkr (d) r
+ real, value :: r
+ end function
+ end interface
+
+ interface
+ attributes(device) real(4) function __powf(x,y) bind(c, name='__nv_fast_powf')
+ !dir$ ignore_tkr (d) x, y
+ real(4), value :: x, y
+ end function
+ end interface
+
+ interface __sincosf
+ attributes(device) subroutine __sincosf(r, s, c) bind(c, name='__nv_fast_sincosf')
+ !dir$ ignore_tkr (d) r, (d) s, (d) c
+ real, value :: r
+ real :: s, c
+ end subroutine
+ end interface
+
interface __sinf
- attributes(device) real function __sinf(r) bind(c, name='__nv_sinf')
+ attributes(device) real function __sinf(r) bind(c, name='__nv_fast_sinf')
!dir$ ignore_tkr (d) r
real, value :: r
end function
end interface
interface __tanf
- attributes(device) real function __tanf(r) bind(c, name='__nv_tanf')
+ attributes(device) real function __tanf(r) bind(c, name='__nv_fast_tanf')
!dir$ ignore_tkr (d) r
real, value :: r
end function
@@ -1043,13 +1093,6 @@ attributes(device) real(8) function sinpi(x) bind(c,name='__nv_sinpi')
end function
end interface
- interface
- attributes(device) real(4) function __powf(x,y) bind(c, name='__nv_powf')
- !dir$ ignore_tkr (d) x, y
- real(4), value :: x, y
- end function
- end interface
-
interface __brev
attributes(device) integer function __brev(i) bind(c, name='__nv_brev')
!dir$ ignore_tkr (d) i
@@ -1909,41 +1952,6 @@ attributes(device,host) logical function on_device() bind(c)
end function
end interface
- interface __log2f
- attributes(device) real function __log2f(r) bind(c, name='__nv_log2f')
- !dir$ ignore_tkr (d) r
- real, value :: r
- end function
- end interface
-
- interface __log10f
- attributes(device) real function __log10f(r) bind(c, name='__nv_log10f')
- !dir$ ignore_tkr (d) r
- real, value :: r
- end function
- end interface
-
- interface __logf
- attributes(device) real function __logf(r) bind(c, name='__nv_logf')
- !dir$ ignore_tkr (d) r
- real, value :: r
- end function
- end interface
-
- interface __expf
- attributes(device) real function __expf(r) bind(c, name='__nv_expf')
- !dir$ ignore_tkr (d) r
- real, value :: r
- end function
- end interface
-
- interface __exp10f
- attributes(device) real function __exp10f(r) bind(c, name='__nv_exp10f')
- !dir$ ignore_tkr (d) r
- real, value :: r
- end function
- end interface
-
contains
attributes(device) subroutine syncthreads()
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index a6e8c69b2e52e..5e1f6b66d1d53 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -140,7 +140,7 @@ end
! CHECK: %{{.*}} = fir.call @__nv_brevll(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> i64
! CHECK: %{{.*}} = fir.call @__nv_clz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32) -> i32
! CHECK: %{{.*}} = fir.call @__nv_clzll(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> i32
-! CHECK: %{{.*}} = fir.call @__nv_cosf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_cosf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
! CHECK: %{{.*}} = fir.call @__nv_ddiv_rn(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
! CHECK: %{{.*}} = fir.call @__nv_ddiv_rz(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
! CHECK: %{{.*}} = fir.call @__nv_ddiv_ru(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64, f64) -> f64
@@ -159,7 +159,7 @@ end
! CHECK: %{{.*}} = fir.call @__nv_double2uint_rz(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f64) -> i32
! CHECK: %{{.*}} = fir.call @__nv_mul24(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32) -> i32
! CHECK: %{{.*}} = fir.call @__nv_umul24(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i32, i32) -> i32
-! CHECK: %{{.*}} = fir.call @__nv_powf(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32, f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_powf(%{{.*}}, %{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32, f32) -> f32
! CHECK: %{{.*}} = fir.call @__nv_ull2double_rd(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
! CHECK: %{{.*}} = fir.call @__nv_ull2double_rn(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
! CHECK: %{{.*}} = fir.call @__nv_ull2double_ru(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (i64) -> f64
diff --git a/flang/test/Lower/CUDA/cuda-libdevice.cuf b/flang/test/Lower/CUDA/cuda-libdevice.cuf
index f9c5dcc5fc4c3..f57a51e3abd5c 100644
--- a/flang/test/Lower/CUDA/cuda-libdevice.cuf
+++ b/flang/test/Lower/CUDA/cuda-libdevice.cuf
@@ -83,9 +83,17 @@ attributes(global) subroutine test_log()
end subroutine
! CHECK-LABEL: _QPtest_log
-! CHECK: %{{.*}} = fir.call @__nv_logf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
-! CHECK: %{{.*}} = fir.call @__nv_log2f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
-! CHECK: %{{.*}} = fir.call @__nv_log10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_logf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_log2f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_log10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+
+attributes(global) subroutine test_sincosf()
+ real :: r, s, c
+ call __sincosf(r, s, c)
+end subroutine
+
+! CHECK-LABEL: _QPtest_sincosf
+! CHECK: fir.call @__nv_fast_sincosf(%{{.*}}, %{{.*}}#0, %{{.*}}#0) proc_attrs<bind_c> fastmath<contract> : (f32, !fir.ref<f32>, !fir.ref<f32>) -> ()
attributes(global) subroutine test_sinf()
real :: res
@@ -94,7 +102,7 @@ attributes(global) subroutine test_sinf()
end subroutine
! CHECK-LABEL: _QPtest_sinf
-! CHECK: %{{.*}} = fir.call @__nv_sinf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_sinf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
attributes(global) subroutine test_tanf()
real :: res
@@ -103,7 +111,7 @@ attributes(global) subroutine test_tanf()
end subroutine
! CHECK-LABEL: _QPtest_tanf
-! CHECK: %{{.*}} = fir.call @__nv_tanf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_tanf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
attributes(global) subroutine test_exp()
real :: res
@@ -113,8 +121,8 @@ attributes(global) subroutine test_exp()
end subroutine
! CHECK-LABEL: _QPtest_exp
-! CHECK: %{{.*}} = fir.call @__nv_expf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
-! CHECK: %{{.*}} = fir.call @__nv_exp10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_expf(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
+! CHECK: %{{.*}} = fir.call @__nv_fast_exp10f(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (f32) -> f32
attributes(global) subroutine test_double2ll_rX()
integer(8) :: res
More information about the flang-commits
mailing list