[clang] 3304a43 - clang/HIP: Do not call ocml in scalbln implementations (#129639)

Thu Mar 6 16:55:30 PST 2025

Author: Matt Arsenault
Date: 2025-03-07T07:55:26+07:00
New Revision: 3304a430f291e31c6b71ff73a1b44f51456dca56

URL: https://github.com/llvm/llvm-project/commit/3304a430f291e31c6b71ff73a1b44f51456dca56
DIFF: https://github.com/llvm/llvm-project/commit/3304a430f291e31c6b71ff73a1b44f51456dca56.diff

LOG: clang/HIP: Do not call ocml in scalbln implementations (#129639)

I do not understand why this was calling the float version with
an implicit cast from the long. Just clamp to the bounds of int,
and use the generic ldexp (this is also how musl does it, except
scalbnf is the base implementation there).

Somehow INT_MIN was also not defined, so deal with that.

Added: 
    

Modified: 
    clang/lib/Headers/__clang_hip_math.h
    clang/lib/Headers/__clang_hip_runtime_wrapper.h
    clang/test/Headers/__clang_hip_math.hip

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 51d9acbb87270..f6c06eaf4afe0 100644

--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -639,8 +639,11 @@ float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
 
 __DEVICE__
 float scalblnf(float __x, long int __n) {
-  return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n)
-                         : __ocml_scalb_f32(__x, __n);
+  if (__n > INT_MAX)
+    __n = INT_MAX;
+  else if (__n < INT_MIN)
+    __n = INT_MIN;
+  return __builtin_ldexpf(__x, (int)__n);
 }
 
 __DEVICE__
@@ -1044,8 +1047,11 @@ double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
 
 __DEVICE__
 double scalbln(double __x, long int __n) {
-  return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n)
-                         : __ocml_scalb_f64(__x, __n);
+  if (__n > INT_MAX)
+    __n = INT_MAX;
+  else if (__n < INT_MIN)
+    __n = INT_MIN;
+  return __builtin_ldexp(__x, (int)__n);
 }
 __DEVICE__
 double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); }

diff  --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
index ed1550038e63e..da1e39ac7270e 100644
--- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -125,11 +125,13 @@ typedef __SIZE_TYPE__ size_t;
 #pragma push_macro("uint64_t")
 #pragma push_macro("CHAR_BIT")
 #pragma push_macro("INT_MAX")
+#pragma push_macro("INT_MIN")
 #define NULL (void *)0
 #define uint32_t __UINT32_TYPE__
 #define uint64_t __UINT64_TYPE__
 #define CHAR_BIT __CHAR_BIT__
 #define INT_MAX __INTMAX_MAX__
+#define INT_MIN (-__INT_MAX__ - 1)
 #endif // __HIPCC_RTC__
 
 #include <__clang_hip_libdevice_declares.h>
@@ -154,6 +156,7 @@ typedef __SIZE_TYPE__ size_t;
 #pragma pop_macro("uint64_t")
 #pragma pop_macro("CHAR_BIT")
 #pragma pop_macro("INT_MAX")
+#pragma pop_macro("INT_MIN")
 #endif // __HIPCC_RTC__
 #endif // __HIP__
 #endif // __CLANG_HIP_RUNTIME_WRAPPER_H__

diff  --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index ff9f55a8e0710..e879fec0ebe5a 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -4984,63 +4984,31 @@ extern "C" __device__ double test_rsqrt(double x) {
 
 // DEFAULT-LABEL: @test_scalblnf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// DEFAULT:       cond.true.i:
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
-// DEFAULT-NEXT:    br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
-// DEFAULT:       cond.false.i:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
-// DEFAULT-NEXT:    br label [[_ZL8SCALBLNFFL_EXIT]]
-// DEFAULT:       _ZL8scalblnffl.exit:
-// DEFAULT-NEXT:    [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// DEFAULT-NEXT:    ret float [[COND_I]]
+// DEFAULT-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// DEFAULT-NEXT:    ret float [[TMP0]]
 //
 // FINITEONLY-LABEL: @test_scalblnf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// FINITEONLY-NEXT:    br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// FINITEONLY:       cond.true.i:
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
-// FINITEONLY-NEXT:    br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
-// FINITEONLY:       cond.false.i:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_scalb_f32(float noundef nofpclass(nan inf) [[X]], float noundef nofpclass(nan inf) 0x43E0000000000000) #[[ATTR12]]
-// FINITEONLY-NEXT:    br label [[_ZL8SCALBLNFFL_EXIT]]
-// FINITEONLY:       _ZL8scalblnffl.exit:
-// FINITEONLY-NEXT:    [[COND_I:%.*]] = phi nnan ninf contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// FINITEONLY-NEXT:    ret float [[COND_I]]
+// FINITEONLY-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// FINITEONLY-NEXT:    ret float [[TMP0]]
 //
 // APPROX-LABEL: @test_scalblnf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// APPROX-NEXT:    br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// APPROX:       cond.true.i:
-// APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
-// APPROX-NEXT:    br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
-// APPROX:       cond.false.i:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
-// APPROX-NEXT:    br label [[_ZL8SCALBLNFFL_EXIT]]
-// APPROX:       _ZL8scalblnffl.exit:
-// APPROX-NEXT:    [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// APPROX-NEXT:    ret float [[COND_I]]
+// APPROX-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// APPROX-NEXT:    ret float [[TMP0]]
 //
 // AMDGCNSPIRV-LABEL: @test_scalblnf(
 // AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// AMDGCNSPIRV:       cond.true.i:
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
-// AMDGCNSPIRV-NEXT:    br label [[_ZL8SCALBLNFFL_EXIT:%.*]]
-// AMDGCNSPIRV:       cond.false.i:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) float @__ocml_scalb_f32(float noundef [[X]], float noundef 0x43E0000000000000) #[[ATTR12]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL8SCALBLNFFL_EXIT]]
-// AMDGCNSPIRV:       _ZL8scalblnffl.exit:
-// AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = phi contract float [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// AMDGCNSPIRV-NEXT:    ret float [[COND_I]]
+// AMDGCNSPIRV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) float @llvm.ldexp.f32.i32(float [[X:%.*]], i32 [[CONV_I]])
+// AMDGCNSPIRV-NEXT:    ret float [[TMP0]]
 //
 extern "C" __device__ float test_scalblnf(float x, long int y) {
   return scalblnf(x, y);
@@ -5048,63 +5016,31 @@ extern "C" __device__ float test_scalblnf(float x, long int y) {
 
 // DEFAULT-LABEL: @test_scalbln(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// DEFAULT:       cond.true.i:
-// DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
-// DEFAULT-NEXT:    br label [[_ZL7SCALBLNDL_EXIT:%.*]]
-// DEFAULT:       cond.false.i:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
-// DEFAULT-NEXT:    br label [[_ZL7SCALBLNDL_EXIT]]
-// DEFAULT:       _ZL7scalblndl.exit:
-// DEFAULT-NEXT:    [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// DEFAULT-NEXT:    ret double [[COND_I]]
+// DEFAULT-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// DEFAULT-NEXT:    ret double [[TMP0]]
 //
 // FINITEONLY-LABEL: @test_scalbln(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// FINITEONLY-NEXT:    br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// FINITEONLY:       cond.true.i:
-// FINITEONLY-NEXT:    [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
-// FINITEONLY-NEXT:    br label [[_ZL7SCALBLNDL_EXIT:%.*]]
-// FINITEONLY:       cond.false.i:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) double @__ocml_scalb_f64(double noundef nofpclass(nan inf) [[X]], double noundef nofpclass(nan inf) 0x43E0000000000000) #[[ATTR12]]
-// FINITEONLY-NEXT:    br label [[_ZL7SCALBLNDL_EXIT]]
-// FINITEONLY:       _ZL7scalblndl.exit:
-// FINITEONLY-NEXT:    [[COND_I:%.*]] = phi nnan ninf contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// FINITEONLY-NEXT:    ret double [[COND_I]]
+// FINITEONLY-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// FINITEONLY-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// FINITEONLY-NEXT:    ret double [[TMP0]]
 //
 // APPROX-LABEL: @test_scalbln(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// APPROX-NEXT:    br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// APPROX:       cond.true.i:
-// APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
-// APPROX-NEXT:    br label [[_ZL7SCALBLNDL_EXIT:%.*]]
-// APPROX:       cond.false.i:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
-// APPROX-NEXT:    br label [[_ZL7SCALBLNDL_EXIT]]
-// APPROX:       _ZL7scalblndl.exit:
-// APPROX-NEXT:    [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// APPROX-NEXT:    ret double [[COND_I]]
+// APPROX-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// APPROX-NEXT:    [[TMP0:%.*]] = tail call contract noundef double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// APPROX-NEXT:    ret double [[TMP0]]
 //
 // AMDGCNSPIRV-LABEL: @test_scalbln(
 // AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i64 [[Y:%.*]], 9223372036854775807
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[COND_FALSE_I:%.*]], label [[COND_TRUE_I:%.*]]
-// AMDGCNSPIRV:       cond.true.i:
-// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[Y]] to i32
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
-// AMDGCNSPIRV-NEXT:    br label [[_ZL7SCALBLNDL_EXIT:%.*]]
-// AMDGCNSPIRV:       cond.false.i:
-// AMDGCNSPIRV-NEXT:    [[CALL_I:%.*]] = tail call contract spir_func addrspace(4) double @__ocml_scalb_f64(double noundef [[X]], double noundef 0x43E0000000000000) #[[ATTR12]]
-// AMDGCNSPIRV-NEXT:    br label [[_ZL7SCALBLNDL_EXIT]]
-// AMDGCNSPIRV:       _ZL7scalblndl.exit:
-// AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = phi contract double [ [[TMP0]], [[COND_TRUE_I]] ], [ [[CALL_I]], [[COND_FALSE_I]] ]
-// AMDGCNSPIRV-NEXT:    ret double [[COND_I]]
+// AMDGCNSPIRV-NEXT:    [[SPEC_STORE_SELECT_I:%.*]] = tail call addrspace(4) i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -2147483648)
+// AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SPEC_STORE_SELECT_I]] to i32
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = tail call contract noundef addrspace(4) double @llvm.ldexp.f64.i32(double [[X:%.*]], i32 [[CONV_I]])
+// AMDGCNSPIRV-NEXT:    ret double [[TMP0]]
 //
 extern "C" __device__ double test_scalbln(double x, long int y) {
   return scalbln(x, y);