[libclc] [libclc] Use CLC conversion builtins in CLC functions (PR #127628)
Fraser Cormack via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 18 05:14:54 PST 2025
https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/127628
This commit is a broad update across libclc to use the CLC conversion builtins in CLC functions, even those with a '__clc' prefix in the generic folder. This better prepares them for an official move to the CLC library in time.
The CLC conversion builtins have an additional benefit in that they support scalars, unlike the __builtin_convertvector builtin which we were using previously. This allows us to simplify some shared definitions.
There is one change to the IR, in the scalar upsample(char, uchar) builtin. It now sign-extends the first argument to i16, where before it zero-extended it. This appears to be correct, and matches the vector behaviour.
>From ed8a1251d42d1e022b857fa2f7fab8f8553146b3 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Tue, 11 Feb 2025 15:41:08 +0000
Subject: [PATCH] [libclc] Use CLC conversion builtins in CLC functions
This commit is a broad update across libclc to use the CLC conversion
builtins in CLC functions, even those with a '__clc' prefix in the
generic folder. This better prepares them for an official move to the
CLC library in time.
The CLC conversion builtins have an additional benefit in that they
support scalars, unlike the __builtin_convertvector builtin which we
were using previously. This allows us to simplify some shared
definitions.
There is one change to the IR, in the scalar upsample(char, uchar)
builtin. It now sign-extends the first argument to i16, where before it
zero-extended it. This appears to be correct, and matches the vector
behaviour.
---
libclc/clc/include/clc/clc_convert.h | 2 +
libclc/clc/lib/generic/integer/clc_mad_sat.cl | 43 ++++---------
libclc/clc/lib/generic/integer/clc_mul_hi.cl | 33 ++++------
.../clc/lib/generic/integer/clc_upsample.cl | 40 ++++++-------
libclc/generic/lib/math/clc_exp10.cl | 17 +++---
libclc/generic/lib/math/clc_fmod.cl | 41 ++++++-------
libclc/generic/lib/math/clc_pow.cl | 54 +++++++++--------
libclc/generic/lib/math/clc_pown.cl | 58 +++++++++---------
libclc/generic/lib/math/clc_powr.cl | 57 +++++++++---------
libclc/generic/lib/math/clc_remainder.cl | 41 ++++++-------
libclc/generic/lib/math/clc_remquo.cl | 41 ++++++-------
libclc/generic/lib/math/clc_rootn.cl | 60 ++++++++++---------
12 files changed, 236 insertions(+), 251 deletions(-)
diff --git a/libclc/clc/include/clc/clc_convert.h b/libclc/clc/include/clc/clc_convert.h
index 20bbd57540b30..12cd988d59c54 100644
--- a/libclc/clc/include/clc/clc_convert.h
+++ b/libclc/clc/include/clc/clc_convert.h
@@ -1,6 +1,8 @@
#ifndef __CLC_CLC_CONVERT_H__
#define __CLC_CLC_CONVERT_H__
+#include <clc/clcmacro.h>
+
#define _CLC_CONVERT_DECL(FROM_TYPE, TO_TYPE, SUFFIX) \
_CLC_OVERLOAD _CLC_DECL TO_TYPE __clc_convert_##TO_TYPE##SUFFIX(FROM_TYPE x);
diff --git a/libclc/clc/lib/generic/integer/clc_mad_sat.cl b/libclc/clc/lib/generic/integer/clc_mad_sat.cl
index 4e559dba2b2f5..530e9c84b10a0 100644
--- a/libclc/clc/lib/generic/integer/clc_mad_sat.cl
+++ b/libclc/clc/lib/generic/integer/clc_mad_sat.cl
@@ -1,3 +1,4 @@
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/integer/clc_add_sat.h>
#include <clc/integer/clc_mad24.h>
@@ -8,34 +9,23 @@
#include <clc/relational/clc_select.h>
#include <clc/shared/clc_clamp.h>
-#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)
-
-// Macro for defining mad_sat variants for char/uchar/short/ushort
-// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs
#define __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \
_CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \
- return __clc_clamp( \
- (UP_TYPE)__clc_mad24((UP_TYPE)x, (UP_TYPE)y, (UP_TYPE)z), \
- (UP_TYPE)LIT_PREFIX##_MIN, (UP_TYPE)LIT_PREFIX##_MAX); \
- }
-
-#define __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE, UP_TYPE, LIT_PREFIX) \
- _CLC_OVERLOAD _CLC_DEF TYPE __clc_mad_sat(TYPE x, TYPE y, TYPE z) { \
- UP_TYPE upscaled_mad = __clc_mad24(__CLC_CONVERT_TY(x, UP_TYPE), \
- __CLC_CONVERT_TY(y, UP_TYPE), \
- __CLC_CONVERT_TY(z, UP_TYPE)); \
+ UP_TYPE upscaled_mad = \
+ __clc_mad24(__clc_convert_##UP_TYPE(x), __clc_convert_##UP_TYPE(y), \
+ __clc_convert_##UP_TYPE(z)); \
UP_TYPE clamped_mad = __clc_clamp(upscaled_mad, (UP_TYPE)LIT_PREFIX##_MIN, \
(UP_TYPE)LIT_PREFIX##_MAX); \
- return __CLC_CONVERT_TY(clamped_mad, TYPE); \
+ return __clc_convert_##TYPE(clamped_mad); \
}
#define __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(TYPE, UP_TYPE, LIT_PREFIX) \
__CLC_DEFINE_SIMPLE_MAD_SAT(TYPE, UP_TYPE, LIT_PREFIX) \
- __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##2, UP_TYPE##2, LIT_PREFIX) \
- __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##3, UP_TYPE##3, LIT_PREFIX) \
- __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##4, UP_TYPE##4, LIT_PREFIX) \
- __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##8, UP_TYPE##8, LIT_PREFIX) \
- __CLC_DEFINE_SIMPLE_MAD_SAT_VEC(TYPE##16, UP_TYPE##16, LIT_PREFIX)
+ __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##2, UP_TYPE##2, LIT_PREFIX) \
+ __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##3, UP_TYPE##3, LIT_PREFIX) \
+ __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##4, UP_TYPE##4, LIT_PREFIX) \
+ __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##8, UP_TYPE##8, LIT_PREFIX) \
+ __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##16, UP_TYPE##16, LIT_PREFIX)
__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(char, int, CHAR)
__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(uchar, uint, UCHAR)
@@ -67,20 +57,13 @@ __CLC_DEFINE_UINTLONG_MAD_SAT_ALL_TYS(ulong, long, ULONG)
INTTY mhi = __clc_mul_hi(x, y); \
UINTTY mlo = __clc_as_##UINTTY(x * y); \
SLONGTY m = __clc_upsample(mhi, mlo); \
- m += __CLC_CONVERT_TY(z, SLONGTY); \
+ m += __clc_convert_##SLONGTY(z); \
m = __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \
- return __CLC_CONVERT_TY(m, INTTY); \
+ return __clc_convert_##INTTY(m); \
}
-// FIXME: Once using __clc_convert_ty, can easily unify scalar and vector defs
#define __CLC_DEFINE_SINT_MAD_SAT_ALL_TYS(INTTY, UINTTY, SLONGTY) \
- _CLC_OVERLOAD _CLC_DEF INTTY __clc_mad_sat(INTTY x, INTTY y, INTTY z) { \
- INTTY mhi = __clc_mul_hi(x, y); \
- UINTTY mlo = __clc_as_##UINTTY(x * y); \
- SLONGTY m = __clc_upsample(mhi, mlo); \
- m += z; \
- return __clc_clamp(m, (SLONGTY)INT_MIN, (SLONGTY)INT_MAX); \
- } \
+ __CLC_DEFINE_SINT_MAD_SAT(INTTY, UINTTY, SLONGTY) \
__CLC_DEFINE_SINT_MAD_SAT(INTTY##2, UINTTY##2, SLONGTY##2) \
__CLC_DEFINE_SINT_MAD_SAT(INTTY##3, UINTTY##3, SLONGTY##3) \
__CLC_DEFINE_SINT_MAD_SAT(INTTY##4, UINTTY##4, SLONGTY##4) \
diff --git a/libclc/clc/lib/generic/integer/clc_mul_hi.cl b/libclc/clc/lib/generic/integer/clc_mul_hi.cl
index cf4acc5429cb4..28457ac6126dd 100644
--- a/libclc/clc/lib/generic/integer/clc_mul_hi.cl
+++ b/libclc/clc/lib/generic/integer/clc_mul_hi.cl
@@ -1,31 +1,24 @@
+#include <clc/clc_convert.h>
#include <clc/integer/clc_hadd.h>
#include <clc/integer/definitions.h>
#include <clc/internal/clc.h>
-// TODO: Replace with __clc_convert_<type> when available
-#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)
-
-#define __CLC_MUL_HI_VEC_IMPL(BGENTYPE, GENTYPE, GENSIZE) \
- _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \
- BGENTYPE large_x = __CLC_CONVERT_TY(x, BGENTYPE); \
- BGENTYPE large_y = __CLC_CONVERT_TY(y, BGENTYPE); \
- BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \
- return __CLC_CONVERT_TY(large_mul_hi, GENTYPE); \
- }
-
// For all types EXCEPT long, which is implemented separately
#define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \
_CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \
- return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \
+ BGENTYPE large_x = __clc_convert_##BGENTYPE(x); \
+ BGENTYPE large_y = __clc_convert_##BGENTYPE(y); \
+ BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \
+ return __clc_convert_##GENTYPE(large_mul_hi); \
}
#define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \
__CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \
- __CLC_MUL_HI_VEC_IMPL(BTYPE##2, TYPE##2, BITS) \
- __CLC_MUL_HI_VEC_IMPL(BTYPE##3, TYPE##3, BITS) \
- __CLC_MUL_HI_VEC_IMPL(BTYPE##4, TYPE##4, BITS) \
- __CLC_MUL_HI_VEC_IMPL(BTYPE##8, TYPE##8, BITS) \
- __CLC_MUL_HI_VEC_IMPL(BTYPE##16, TYPE##16, BITS)
+ __CLC_MUL_HI_IMPL(BTYPE##2, TYPE##2, BITS) \
+ __CLC_MUL_HI_IMPL(BTYPE##3, TYPE##3, BITS) \
+ __CLC_MUL_HI_IMPL(BTYPE##4, TYPE##4, BITS) \
+ __CLC_MUL_HI_IMPL(BTYPE##8, TYPE##8, BITS) \
+ __CLC_MUL_HI_IMPL(BTYPE##16, TYPE##16, BITS)
_CLC_OVERLOAD _CLC_DEF long __clc_mul_hi(long x, long y) {
long f, o, i;
@@ -98,8 +91,8 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) {
f = x_hi * y_hi; \
o = x_hi * y_lo; \
i = x_lo * y_hi; \
- l = __CLC_CONVERT_TY(x_lo * y_lo, UTY); \
- i += __CLC_CONVERT_TY(l >> (UTY)32, TY); \
+ l = __clc_convert_##UTY(x_lo * y_lo); \
+ i += __clc_convert_##TY(l >> (UTY)32); \
\
return f + (__clc_hadd(o, i) >> (TY)31); \
}
@@ -128,5 +121,3 @@ __CLC_MUL_HI_TYPES()
#undef __CLC_MUL_HI_LONG_VEC_IMPL
#undef __CLC_MUL_HI_DEC_IMPL
#undef __CLC_MUL_HI_IMPL
-#undef __CLC_MUL_HI_VEC_IMPL
-#undef __CLC_CONVERT_TY
diff --git a/libclc/clc/lib/generic/integer/clc_upsample.cl b/libclc/clc/lib/generic/integer/clc_upsample.cl
index d53ef7240bfc2..a13a6ed88ddcd 100644
--- a/libclc/clc/lib/generic/integer/clc_upsample.cl
+++ b/libclc/clc/lib/generic/integer/clc_upsample.cl
@@ -1,35 +1,31 @@
+#include <clc/clc_convert.h>
#include <clc/internal/clc.h>
-// TODO: Replace with __clc_convert_<type> when available
-#define __CLC_CONVERT_TY(X, TY) __builtin_convertvector(X, TY)
-
-#define __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
+#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
_CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \
- BGENTYPE large_hi = __CLC_CONVERT_TY(hi, BGENTYPE); \
- BGENTYPE large_lo = __CLC_CONVERT_TY(lo, BGENTYPE); \
+ BGENTYPE large_hi = __clc_convert_##BGENTYPE(hi); \
+ BGENTYPE large_lo = __clc_convert_##BGENTYPE(lo); \
return (large_hi << (BGENTYPE)GENSIZE) | large_lo; \
}
-#define __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
- _CLC_OVERLOAD _CLC_DEF BGENTYPE __clc_upsample(GENTYPE hi, UGENTYPE lo) { \
- return ((BGENTYPE)hi << GENSIZE) | lo; \
- } \
- __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE) \
- __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE) \
- __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE) \
- __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE) \
- __CLC_UPSAMPLE_VEC_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE)
+#define __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(BGENTYPE, GENTYPE, UGENTYPE, \
+ GENSIZE) \
+ __CLC_UPSAMPLE_IMPL(BGENTYPE, GENTYPE, UGENTYPE, GENSIZE) \
+ __CLC_UPSAMPLE_IMPL(BGENTYPE##2, GENTYPE##2, UGENTYPE##2, GENSIZE) \
+ __CLC_UPSAMPLE_IMPL(BGENTYPE##3, GENTYPE##3, UGENTYPE##3, GENSIZE) \
+ __CLC_UPSAMPLE_IMPL(BGENTYPE##4, GENTYPE##4, UGENTYPE##4, GENSIZE) \
+ __CLC_UPSAMPLE_IMPL(BGENTYPE##8, GENTYPE##8, UGENTYPE##8, GENSIZE) \
+ __CLC_UPSAMPLE_IMPL(BGENTYPE##16, GENTYPE##16, UGENTYPE##16, GENSIZE)
#define __CLC_UPSAMPLE_TYPES() \
- __CLC_UPSAMPLE_IMPL(short, char, uchar, 8) \
- __CLC_UPSAMPLE_IMPL(ushort, uchar, uchar, 8) \
- __CLC_UPSAMPLE_IMPL(int, short, ushort, 16) \
- __CLC_UPSAMPLE_IMPL(uint, ushort, ushort, 16) \
- __CLC_UPSAMPLE_IMPL(long, int, uint, 32) \
- __CLC_UPSAMPLE_IMPL(ulong, uint, uint, 32)
+ __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(short, char, uchar, 8) \
+ __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(ushort, uchar, uchar, 8) \
+ __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(int, short, ushort, 16) \
+ __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(uint, ushort, ushort, 16) \
+ __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(long, int, uint, 32) \
+ __CLC_UPSAMPLE_IMPL_ALL_VEC_SIZES(ulong, uint, uint, 32)
__CLC_UPSAMPLE_TYPES()
#undef __CLC_UPSAMPLE_TYPES
#undef __CLC_UPSAMPLE_IMPL
-#undef __CLC_CONVERT_TY
diff --git a/libclc/generic/lib/math/clc_exp10.cl b/libclc/generic/lib/math/clc_exp10.cl
index 0eb53d013a85a..4f839a9815ac0 100644
--- a/libclc/generic/lib/math/clc_exp10.cl
+++ b/libclc/generic/lib/math/clc_exp10.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/math/clc_mad.h>
#include <clc/math/clc_subnormal_config.h>
@@ -70,7 +71,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_exp10(float x) {
int return_inf = x > X_MAX;
int return_zero = x < X_MIN;
- int n = convert_int(x * R_64_BY_LOG10_2);
+ int n = __clc_convert_int(x * R_64_BY_LOG10_2);
float fn = (float)n;
int j = n & 0x3f;
@@ -89,11 +90,11 @@ _CLC_DEF _CLC_OVERLOAD float __clc_exp10(float x) {
float two_to_jby64 = USE_TABLE(exp_tbl, j);
z2 = __clc_mad(two_to_jby64, z2, two_to_jby64);
- float z2s = z2 * as_float(0x1 << (m + 149));
- float z2n = as_float(as_int(z2) + m2);
+ float z2s = z2 * __clc_as_float(0x1 << (m + 149));
+ float z2n = __clc_as_float(__clc_as_int(z2) + m2);
z2 = m <= -126 ? z2s : z2n;
- z2 = return_inf ? as_float(PINFBITPATT_SP32) : z2;
+ z2 = return_inf ? __clc_as_float(PINFBITPATT_SP32) : z2;
z2 = return_zero ? 0.0f : z2;
z2 = return_nan ? x : z2;
return z2;
@@ -115,7 +116,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) {
// ln(10)
const double R_LN10 = 0x1.26bb1bbb55516p+1;
- int n = convert_int(x * R_64_BY_LOG10_2);
+ int n = __clc_convert_int(x * R_64_BY_LOG10_2);
double dn = (double)n;
@@ -144,15 +145,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) {
int n1 = m >> 2;
int n2 = m - n1;
- double z3 = z2 * as_double(((long)n1 + 1023) << 52);
- z3 *= as_double(((long)n2 + 1023) << 52);
+ double z3 = z2 * __clc_as_double(((long)n1 + 1023) << 52);
+ z3 *= __clc_as_double(((long)n2 + 1023) << 52);
z2 = ldexp(z2, m);
z2 = small_value ? z3 : z2;
z2 = __clc_isnan(x) ? x : z2;
- z2 = x > X_MAX ? as_double(PINFBITPATT_DP64) : z2;
+ z2 = x > X_MAX ? __clc_as_double(PINFBITPATT_DP64) : z2;
z2 = x < X_MIN ? 0.0 : z2;
return z2;
diff --git a/libclc/generic/lib/math/clc_fmod.cl b/libclc/generic/lib/math/clc_fmod.cl
index a4a2ab791df68..31a5d4dc05c03 100644
--- a/libclc/generic/lib/math/clc_fmod.cl
+++ b/libclc/generic/lib/math/clc_fmod.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/integer/clc_clz.h>
#include <clc/math/clc_floor.h>
@@ -31,19 +32,19 @@
#include <math/clc_remainder.h>
_CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y) {
- int ux = as_int(x);
+ int ux = __clc_as_int(x);
int ax = ux & EXSIGNBIT_SP32;
- float xa = as_float(ax);
+ float xa = __clc_as_float(ax);
int sx = ux ^ ax;
int ex = ax >> EXPSHIFTBITS_SP32;
- int uy = as_int(y);
+ int uy = __clc_as_int(y);
int ay = uy & EXSIGNBIT_SP32;
- float ya = as_float(ay);
+ float ya = __clc_as_float(ay);
int ey = ay >> EXPSHIFTBITS_SP32;
- float xr = as_float(0x3f800000 | (ax & 0x007fffff));
- float yr = as_float(0x3f800000 | (ay & 0x007fffff));
+ float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff));
+ float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff));
int c;
int k = ex - ey;
@@ -62,17 +63,17 @@ _CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y) {
xr = lt ? xa : xr;
yr = lt ? ya : yr;
- float s = as_float(ey << EXPSHIFTBITS_SP32);
+ float s = __clc_as_float(ey << EXPSHIFTBITS_SP32);
xr *= lt ? 1.0f : s;
c = ax == ay;
xr = c ? 0.0f : xr;
- xr = as_float(sx ^ as_int(xr));
+ xr = __clc_as_float(sx ^ __clc_as_int(xr));
c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 |
ay == 0;
- xr = c ? as_float(QNANBITPATT_SP32) : xr;
+ xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr;
return xr;
}
@@ -80,18 +81,18 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_fmod, float, float);
#ifdef cl_khr_fp64
_CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
- ulong ux = as_ulong(x);
+ ulong ux = __clc_as_ulong(x);
ulong ax = ux & ~SIGNBIT_DP64;
ulong xsgn = ux ^ ax;
- double dx = as_double(ax);
- int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
+ double dx = __clc_as_double(ax);
+ int xexp = __clc_convert_int(ax >> EXPSHIFTBITS_DP64);
int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64);
xexp1 = xexp < 1 ? xexp1 : xexp;
- ulong uy = as_ulong(y);
+ ulong uy = __clc_as_ulong(y);
ulong ay = uy & ~SIGNBIT_DP64;
- double dy = as_double(ay);
- int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
+ double dy = __clc_as_double(ay);
+ int yexp = __clc_convert_int(ay >> EXPSHIFTBITS_DP64);
int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64);
yexp1 = yexp < 1 ? yexp1 : yexp;
@@ -151,12 +152,12 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
dx += i ? w : 0.0;
// At this point, dx lies in the range [0,dy)
- double ret = as_double(xsgn ^ as_ulong(dx));
- dx = as_double(ax);
+ double ret = __clc_as_double(xsgn ^ __clc_as_ulong(dx));
+ dx = __clc_as_double(ax);
// Now handle |x| == |y|
int c = dx == dy;
- t = as_double(xsgn);
+ t = __clc_as_double(xsgn);
ret = c ? t : ret;
// Next, handle |x| < |y|
@@ -167,7 +168,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
// |y| is 0
c = dy == 0.0;
- ret = c ? as_double(QNANBITPATT_DP64) : ret;
+ ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret;
// y is +-Inf, NaN
c = yexp > BIASEDEMAX_DP64;
@@ -176,7 +177,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
// x is +=Inf, NaN
c = xexp > BIASEDEMAX_DP64;
- ret = c ? as_double(QNANBITPATT_DP64) : ret;
+ ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret;
return ret;
}
diff --git a/libclc/generic/lib/math/clc_pow.cl b/libclc/generic/lib/math/clc_pow.cl
index 5dcd392c0f7ed..fce9573c39bac 100644
--- a/libclc/generic/lib/math/clc_pow.cl
+++ b/libclc/generic/lib/math/clc_pow.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_mad.h>
@@ -68,18 +69,18 @@
_CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) {
- int ix = as_int(x);
+ int ix = __clc_as_int(x);
int ax = ix & EXSIGNBIT_SP32;
int xpos = ix == ax;
- int iy = as_int(y);
+ int iy = __clc_as_int(y);
int ay = iy & EXSIGNBIT_SP32;
int ypos = iy == ay;
/* Extra precise log calculation
* First handle case that x is close to 1
*/
- float r = 1.0f - as_float(ax);
+ float r = 1.0f - __clc_as_float(ax);
int near1 = __clc_fabs(r) < 0x1.0p-4f;
float r2 = r * r;
@@ -103,7 +104,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) {
/* Computations for x not near 1 */
int m = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
float mf = (float)m;
- int ixs = as_int(as_float(ax | 0x3f800000) - 1.0f);
+ int ixs = __clc_as_int(__clc_as_float(ax | 0x3f800000) - 1.0f);
float mfs = (float)((ixs >> EXPSHIFTBITS_SP32) - 253);
int c = m == -127;
int ixn = c ? ixs : ax;
@@ -112,8 +113,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) {
int indx = (ixn & 0x007f0000) + ((ixn & 0x00008000) << 1);
/* F - Y */
- float f = as_float(0x3f000000 | indx) -
- as_float(0x3f000000 | (ixn & MANTBITS_SP32));
+ float f = __clc_as_float(0x3f000000 | indx) -
+ __clc_as_float(0x3f000000 | (ixn & MANTBITS_SP32));
indx = indx >> 16;
float2 tv = USE_TABLE(log_inv_tbl_ep, indx);
@@ -141,10 +142,10 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) {
lh = near1 ? lh_near1 : lh;
l = near1 ? l_near1 : l;
- float gh = as_float(as_int(l) & 0xfffff000);
+ float gh = __clc_as_float(__clc_as_int(l) & 0xfffff000);
float gt = ((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh);
- float yh = as_float(iy & 0xfffff000);
+ float yh = __clc_as_float(iy & 0xfffff000);
float yt = y - yh;
@@ -155,7 +156,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) {
/* Extra precise exp of ylogx */
/* 64/log2 : 92.332482616893657 */
const float R_64_BY_LOG2 = 0x1.715476p+6f;
- int n = convert_int(ylogx * R_64_BY_LOG2);
+ int n = __clc_convert_int(ylogx * R_64_BY_LOG2);
float nf = (float)n;
int j = n & 0x3f;
@@ -178,14 +179,14 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) {
float expylogx =
__clc_mad(tv.s0, poly, __clc_mad(tv.s1, poly, tv.s1)) + tv.s0;
- float sexpylogx = expylogx * as_float(0x1 << (m + 149));
- float texpylogx = as_float(as_int(expylogx) + m2);
+ float sexpylogx = expylogx * __clc_as_float(0x1 << (m + 149));
+ float texpylogx = __clc_as_float(__clc_as_int(expylogx) + m2);
expylogx = m < -125 ? sexpylogx : texpylogx;
/* Result is +-Inf if (ylogx + ylogx_t) > 128*log2 */
expylogx = (ylogx > 0x1.62e430p+6f) |
(ylogx == 0x1.62e430p+6f & ylogx_t > -0x1.05c610p-22f)
- ? as_float(PINFBITPATT_SP32)
+ ? __clc_as_float(PINFBITPATT_SP32)
: expylogx;
/* Result is 0 if ylogx < -149*log2 */
@@ -205,9 +206,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) {
inty = yexp < 1 ? 0 : inty;
inty = yexp > 24 ? 2 : inty;
- float signval = as_float((as_uint(expylogx) ^ SIGNBIT_SP32));
+ float signval = __clc_as_float((__clc_as_uint(expylogx) ^ SIGNBIT_SP32));
expylogx = ((inty == 1) & !xpos) ? signval : expylogx;
- int ret = as_int(expylogx);
+ int ret = __clc_as_int(expylogx);
/* Corner case handling */
ret = (!xpos & (inty == 0)) ? QNANBITPATT_SP32 : ret;
@@ -236,7 +237,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y) {
ret = ay == 0 ? 0x3f800000 : ret;
ret = ix == 0x3f800000 ? 0x3f800000 : ret;
- return as_float(ret);
+ return __clc_as_float(ret);
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_pow, float, float)
@@ -245,11 +246,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
const double real_log2_tail = 5.76999904754328540596e-08;
const double real_log2_lead = 6.93147122859954833984e-01;
- long ux = as_long(x);
+ long ux = __clc_as_long(x);
long ax = ux & (~SIGNBIT_DP64);
int xpos = ax == ux;
- long uy = as_long(y);
+ long uy = __clc_as_long(y);
long ay = uy & (~SIGNBIT_DP64);
int ypos = ay == uy;
@@ -261,7 +262,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
double xexp = (double)exp;
long mantissa = ax & 0x000FFFFFFFFFFFFFL;
- long temp_ux = as_long(as_double(0x3ff0000000000000L | mantissa) - 1.0);
+ long temp_ux =
+ __clc_as_long(__clc_as_double(0x3ff0000000000000L | mantissa) - 1.0);
exp = ((temp_ux & 0x7FF0000000000000L) >> 52) - 2045;
double xexp1 = (double)exp;
long mantissa1 = temp_ux & 0x000FFFFFFFFFFFFFL;
@@ -273,14 +275,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
((mantissa & 0x0000080000000000) << 1);
int index = rax >> 44;
- double F = as_double(rax | 0x3FE0000000000000L);
- double Y = as_double(mantissa | 0x3FE0000000000000L);
+ double F = __clc_as_double(rax | 0x3FE0000000000000L);
+ double Y = __clc_as_double(mantissa | 0x3FE0000000000000L);
double f = F - Y;
double2 tv = USE_TABLE(log_f_inv_tbl, index);
double log_h = tv.s0;
double log_t = tv.s1;
double f_inv = (log_h + log_t) * f;
- double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L);
+ double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
double r2 = fma(-F, r1, f) * (log_h + log_t);
double r = r1 + r2;
@@ -304,11 +306,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
double resT_h = poly0h;
double H = resT + resH;
- double H_h = as_double(as_long(H) & 0xfffffffff8000000L);
+ double H_h = __clc_as_double(__clc_as_long(H) & 0xfffffffff8000000L);
double T = (resH - H + resT) + (resT_t - (resT + resT_h)) + (H - H_h);
H = H_h;
- double y_head = as_double(uy & 0xfffffffff8000000L);
+ double y_head = __clc_as_double(uy & 0xfffffffff8000000L);
double y_tail = y - y_head;
double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
@@ -354,7 +356,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
expv = fma(f, q, f2) + f1;
expv = ldexp(expv, m);
- expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv;
+ expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;
expv = v < min_exp_arg ? 0.0 : expv;
}
@@ -376,7 +378,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
expv *= (inty == 1) & !xpos ? -1.0 : 1.0;
- long ret = as_long(expv);
+ long ret = __clc_as_long(expv);
// Now all the edge cases
ret = !xpos & (inty == 0) ? QNANBITPATT_DP64 : ret;
@@ -410,7 +412,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
ret = ay == 0L ? 0x3ff0000000000000L : ret;
ret = ux == 0x3ff0000000000000L ? 0x3ff0000000000000L : ret;
- return as_double(ret);
+ return __clc_as_double(ret);
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pow, double, double)
#endif
diff --git a/libclc/generic/lib/math/clc_pown.cl b/libclc/generic/lib/math/clc_pown.cl
index a0f968c238e99..a613b2998c3f6 100644
--- a/libclc/generic/lib/math/clc_pown.cl
+++ b/libclc/generic/lib/math/clc_pown.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_mad.h>
@@ -67,17 +68,17 @@
_CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) {
float y = (float)ny;
- int ix = as_int(x);
+ int ix = __clc_as_int(x);
int ax = ix & EXSIGNBIT_SP32;
int xpos = ix == ax;
- int iy = as_int(y);
+ int iy = __clc_as_int(y);
int ay = iy & EXSIGNBIT_SP32;
int ypos = iy == ay;
// Extra precise log calculation
// First handle case that x is close to 1
- float r = 1.0f - as_float(ax);
+ float r = 1.0f - __clc_as_float(ax);
int near1 = __clc_fabs(r) < 0x1.0p-4f;
float r2 = r * r;
@@ -101,7 +102,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) {
// Computations for x not near 1
int m = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
float mf = (float)m;
- int ixs = as_int(as_float(ax | 0x3f800000) - 1.0f);
+ int ixs = __clc_as_int(__clc_as_float(ax | 0x3f800000) - 1.0f);
float mfs = (float)((ixs >> EXPSHIFTBITS_SP32) - 253);
int c = m == -127;
int ixn = c ? ixs : ax;
@@ -110,8 +111,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) {
int indx = (ixn & 0x007f0000) + ((ixn & 0x00008000) << 1);
// F - Y
- float f = as_float(0x3f000000 | indx) -
- as_float(0x3f000000 | (ixn & MANTBITS_SP32));
+ float f = __clc_as_float(0x3f000000 | indx) -
+ __clc_as_float(0x3f000000 | (ixn & MANTBITS_SP32));
indx = indx >> 16;
float2 tv = USE_TABLE(log_inv_tbl_ep, indx);
@@ -139,10 +140,10 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) {
lh = near1 ? lh_near1 : lh;
l = near1 ? l_near1 : l;
- float gh = as_float(as_int(l) & 0xfffff000);
+ float gh = __clc_as_float(__clc_as_int(l) & 0xfffff000);
float gt = ((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh);
- float yh = as_float(iy & 0xfffff000);
+ float yh = __clc_as_float(iy & 0xfffff000);
float yt = (float)(ny - (int)yh);
@@ -153,7 +154,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) {
// Extra precise exp of ylogx
// 64/log2 : 92.332482616893657
const float R_64_BY_LOG2 = 0x1.715476p+6f;
- int n = convert_int(ylogx * R_64_BY_LOG2);
+ int n = __clc_convert_int(ylogx * R_64_BY_LOG2);
float nf = (float)n;
int j = n & 0x3f;
@@ -176,14 +177,14 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) {
float expylogx =
__clc_mad(tv.s0, poly, __clc_mad(tv.s1, poly, tv.s1)) + tv.s0;
- float sexpylogx = expylogx * as_float(0x1 << (m + 149));
- float texpylogx = as_float(as_int(expylogx) + m2);
+ float sexpylogx = expylogx * __clc_as_float(0x1 << (m + 149));
+ float texpylogx = __clc_as_float(__clc_as_int(expylogx) + m2);
expylogx = m < -125 ? sexpylogx : texpylogx;
// Result is +-Inf if (ylogx + ylogx_t) > 128*log2
expylogx = ((ylogx > 0x1.62e430p+6f) |
(ylogx == 0x1.62e430p+6f & ylogx_t > -0x1.05c610p-22f))
- ? as_float(PINFBITPATT_SP32)
+ ? __clc_as_float(PINFBITPATT_SP32)
: expylogx;
// Result is 0 if ylogx < -149*log2
@@ -196,9 +197,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) {
int inty = 2 - (ny & 1);
- float signval = as_float((as_uint(expylogx) ^ SIGNBIT_SP32));
+ float signval = __clc_as_float((__clc_as_uint(expylogx) ^ SIGNBIT_SP32));
expylogx = ((inty == 1) & !xpos) ? signval : expylogx;
- int ret = as_int(expylogx);
+ int ret = __clc_as_int(expylogx);
// Corner case handling
int xinf = xpos ? PINFBITPATT_SP32 : NINFBITPATT_SP32;
@@ -218,7 +219,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny) {
ret = ax > PINFBITPATT_SP32 ? ix : ret;
ret = ny == 0 ? 0x3f800000 : ret;
- return as_float(ret);
+ return __clc_as_float(ret);
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_pown, float, int)
@@ -229,11 +230,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
double y = (double)ny;
- long ux = as_long(x);
+ long ux = __clc_as_long(x);
long ax = ux & (~SIGNBIT_DP64);
int xpos = ax == ux;
- long uy = as_long(y);
+ long uy = __clc_as_long(y);
long ay = uy & (~SIGNBIT_DP64);
int ypos = ay == uy;
@@ -245,7 +246,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
double xexp = (double)exp;
long mantissa = ax & 0x000FFFFFFFFFFFFFL;
- long temp_ux = as_long(as_double(0x3ff0000000000000L | mantissa) - 1.0);
+ long temp_ux =
+ __clc_as_long(__clc_as_double(0x3ff0000000000000L | mantissa) - 1.0);
exp = ((temp_ux & 0x7FF0000000000000L) >> 52) - 2045;
double xexp1 = (double)exp;
long mantissa1 = temp_ux & 0x000FFFFFFFFFFFFFL;
@@ -257,14 +259,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
((mantissa & 0x0000080000000000) << 1);
int index = rax >> 44;
- double F = as_double(rax | 0x3FE0000000000000L);
- double Y = as_double(mantissa | 0x3FE0000000000000L);
+ double F = __clc_as_double(rax | 0x3FE0000000000000L);
+ double Y = __clc_as_double(mantissa | 0x3FE0000000000000L);
double f = F - Y;
double2 tv = USE_TABLE(log_f_inv_tbl, index);
double log_h = tv.s0;
double log_t = tv.s1;
double f_inv = (log_h + log_t) * f;
- double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L);
+ double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
double r2 = fma(-F, r1, f) * (log_h + log_t);
double r = r1 + r2;
@@ -288,15 +290,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
double resT_h = poly0h;
double H = resT + resH;
- double H_h = as_double(as_long(H) & 0xfffffffff8000000L);
+ double H_h = __clc_as_double(__clc_as_long(H) & 0xfffffffff8000000L);
double T = (resH - H + resT) + (resT_t - (resT + resT_h)) + (H - H_h);
H = H_h;
- double y_head = as_double(uy & 0xfffffffff8000000L);
+ double y_head = __clc_as_double(uy & 0xfffffffff8000000L);
double y_tail = y - y_head;
int mask_2_24 = ay > 0x4170000000000000; // 2^24
- int nyh = convert_int(y_head);
+ int nyh = __clc_convert_int(y_head);
int nyt = ny - nyh;
double y_tail1 = (double)nyt;
y_tail = mask_2_24 ? y_tail1 : y_tail;
@@ -344,7 +346,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
expv = fma(f, q, f2) + f1;
expv = ldexp(expv, m);
- expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv;
+ expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;
expv = v < min_exp_arg ? 0.0 : expv;
}
@@ -357,7 +359,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
expv *= ((inty == 1) & !xpos) ? -1.0 : 1.0;
- long ret = as_long(expv);
+ long ret = __clc_as_long(expv);
// Now all the edge cases
long xinf = xpos ? PINFBITPATT_DP64 : NINFBITPATT_DP64;
@@ -378,7 +380,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
ret = ax > PINFBITPATT_DP64 ? ux : ret;
ret = ny == 0 ? 0x3ff0000000000000L : ret;
- return as_double(ret);
+ return __clc_as_double(ret);
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int)
#endif
@@ -388,7 +390,7 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int)
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD _CLC_DEF half __clc_pown(half x, int y) {
- return (half)__clc_pown((float)x, y);
+ return (half)__clc_pown((float)x, y);
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_pown, half, int);
diff --git a/libclc/generic/lib/math/clc_powr.cl b/libclc/generic/lib/math/clc_powr.cl
index 7e1a6f2a02e7a..7876acaee89a6 100644
--- a/libclc/generic/lib/math/clc_powr.cl
+++ b/libclc/generic/lib/math/clc_powr.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_mad.h>
@@ -65,17 +66,17 @@
// ((((expT * poly) + expT) + expH*poly) + expH)
_CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) {
- int ix = as_int(x);
+ int ix = __clc_as_int(x);
int ax = ix & EXSIGNBIT_SP32;
int xpos = ix == ax;
- int iy = as_int(y);
+ int iy = __clc_as_int(y);
int ay = iy & EXSIGNBIT_SP32;
int ypos = iy == ay;
// Extra precise log calculation
// First handle case that x is close to 1
- float r = 1.0f - as_float(ax);
+ float r = 1.0f - __clc_as_float(ax);
int near1 = __clc_fabs(r) < 0x1.0p-4f;
float r2 = r * r;
@@ -99,7 +100,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) {
// Computations for x not near 1
int m = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
float mf = (float)m;
- int ixs = as_int(as_float(ax | 0x3f800000) - 1.0f);
+ int ixs = __clc_as_int(__clc_as_float(ax | 0x3f800000) - 1.0f);
float mfs = (float)((ixs >> EXPSHIFTBITS_SP32) - 253);
int c = m == -127;
int ixn = c ? ixs : ax;
@@ -108,8 +109,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) {
int indx = (ixn & 0x007f0000) + ((ixn & 0x00008000) << 1);
// F - Y
- float f = as_float(0x3f000000 | indx) -
- as_float(0x3f000000 | (ixn & MANTBITS_SP32));
+ float f = __clc_as_float(0x3f000000 | indx) -
+ __clc_as_float(0x3f000000 | (ixn & MANTBITS_SP32));
indx = indx >> 16;
float2 tv = USE_TABLE(log_inv_tbl_ep, indx);
@@ -137,10 +138,10 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) {
lh = near1 ? lh_near1 : lh;
l = near1 ? l_near1 : l;
- float gh = as_float(as_int(l) & 0xfffff000);
+ float gh = __clc_as_float(__clc_as_int(l) & 0xfffff000);
float gt = ((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh);
- float yh = as_float(iy & 0xfffff000);
+ float yh = __clc_as_float(iy & 0xfffff000);
float yt = y - yh;
@@ -151,7 +152,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) {
// Extra precise exp of ylogx
// 64/log2 : 92.332482616893657
const float R_64_BY_LOG2 = 0x1.715476p+6f;
- int n = convert_int(ylogx * R_64_BY_LOG2);
+ int n = __clc_convert_int(ylogx * R_64_BY_LOG2);
float nf = (float)n;
int j = n & 0x3f;
@@ -173,14 +174,14 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) {
float expylogx =
__clc_mad(tv.s0, poly, __clc_mad(tv.s1, poly, tv.s1)) + tv.s0;
- float sexpylogx = expylogx * as_float(0x1 << (m + 149));
- float texpylogx = as_float(as_int(expylogx) + m2);
+ float sexpylogx = expylogx * __clc_as_float(0x1 << (m + 149));
+ float texpylogx = __clc_as_float(__clc_as_int(expylogx) + m2);
expylogx = m < -125 ? sexpylogx : texpylogx;
// Result is +-Inf if (ylogx + ylogx_t) > 128*log2
expylogx = ((ylogx > 0x1.62e430p+6f) |
(ylogx == 0x1.62e430p+6f & ylogx_t > -0x1.05c610p-22f))
- ? as_float(PINFBITPATT_SP32)
+ ? __clc_as_float(PINFBITPATT_SP32)
: expylogx;
// Result is 0 if ylogx < -149*log2
@@ -199,9 +200,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) {
inty = yexp < 1 ? 0 : inty;
inty = yexp > 24 ? 2 : inty;
- float signval = as_float((as_uint(expylogx) ^ SIGNBIT_SP32));
+ float signval = __clc_as_float((__clc_as_uint(expylogx) ^ SIGNBIT_SP32));
expylogx = ((inty == 1) & !xpos) ? signval : expylogx;
- int ret = as_int(expylogx);
+ int ret = __clc_as_int(expylogx);
// Corner case handling
ret = ax < 0x3f800000 & iy == NINFBITPATT_SP32 ? PINFBITPATT_SP32 : ret;
@@ -223,7 +224,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y) {
ret = ax > PINFBITPATT_SP32 ? ix : ret;
ret = ay > PINFBITPATT_SP32 ? iy : ret;
- return as_float(ret);
+ return __clc_as_float(ret);
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_powr, float, float)
@@ -232,11 +233,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
const double real_log2_tail = 5.76999904754328540596e-08;
const double real_log2_lead = 6.93147122859954833984e-01;
- long ux = as_long(x);
+ long ux = __clc_as_long(x);
long ax = ux & (~SIGNBIT_DP64);
int xpos = ax == ux;
- long uy = as_long(y);
+ long uy = __clc_as_long(y);
long ay = uy & (~SIGNBIT_DP64);
int ypos = ay == uy;
@@ -248,7 +249,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
double xexp = (double)exp;
long mantissa = ax & 0x000FFFFFFFFFFFFFL;
- long temp_ux = as_long(as_double(0x3ff0000000000000L | mantissa) - 1.0);
+ long temp_ux =
+ __clc_as_long(__clc_as_double(0x3ff0000000000000L | mantissa) - 1.0);
exp = ((temp_ux & 0x7FF0000000000000L) >> 52) - 2045;
double xexp1 = (double)exp;
long mantissa1 = temp_ux & 0x000FFFFFFFFFFFFFL;
@@ -260,14 +262,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
((mantissa & 0x0000080000000000) << 1);
int index = rax >> 44;
- double F = as_double(rax | 0x3FE0000000000000L);
- double Y = as_double(mantissa | 0x3FE0000000000000L);
+ double F = __clc_as_double(rax | 0x3FE0000000000000L);
+ double Y = __clc_as_double(mantissa | 0x3FE0000000000000L);
double f = F - Y;
double2 tv = USE_TABLE(log_f_inv_tbl, index);
double log_h = tv.s0;
double log_t = tv.s1;
double f_inv = (log_h + log_t) * f;
- double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L);
+ double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
double r2 = fma(-F, r1, f) * (log_h + log_t);
double r = r1 + r2;
@@ -291,11 +293,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
double resT_h = poly0h;
double H = resT + resH;
- double H_h = as_double(as_long(H) & 0xfffffffff8000000L);
+ double H_h = __clc_as_double(__clc_as_long(H) & 0xfffffffff8000000L);
double T = (resH - H + resT) + (resT_t - (resT + resT_h)) + (H - H_h);
H = H_h;
- double y_head = as_double(uy & 0xfffffffff8000000L);
+ double y_head = __clc_as_double(uy & 0xfffffffff8000000L);
double y_tail = y - y_head;
double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
@@ -341,7 +343,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
expv = fma(f, q, f2) + f1;
expv = ldexp(expv, m);
- expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv;
+ expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;
expv = v < min_exp_arg ? 0.0 : expv;
}
@@ -363,7 +365,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
expv *= ((inty == 1) & !xpos) ? -1.0 : 1.0;
- long ret = as_long(expv);
+ long ret = __clc_as_long(expv);
// Now all the edge cases
ret = ax < 0x3ff0000000000000L & uy == NINFBITPATT_DP64 ? PINFBITPATT_DP64
@@ -389,7 +391,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
ret = ax > PINFBITPATT_DP64 ? ux : ret;
ret = ay > PINFBITPATT_DP64 ? uy : ret;
- return as_double(ret);
+ return __clc_as_double(ret);
}
-_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_powr, double, double)
+_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_powr, double,
+ double)
#endif
diff --git a/libclc/generic/lib/math/clc_remainder.cl b/libclc/generic/lib/math/clc_remainder.cl
index 31d17d5aaf6b6..6302b9776782f 100644
--- a/libclc/generic/lib/math/clc_remainder.cl
+++ b/libclc/generic/lib/math/clc_remainder.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/integer/clc_clz.h>
#include <clc/math/clc_floor.h>
@@ -31,19 +32,19 @@
#include <math/clc_remainder.h>
_CLC_DEF _CLC_OVERLOAD float __clc_remainder(float x, float y) {
- int ux = as_int(x);
+ int ux = __clc_as_int(x);
int ax = ux & EXSIGNBIT_SP32;
- float xa = as_float(ax);
+ float xa = __clc_as_float(ax);
int sx = ux ^ ax;
int ex = ax >> EXPSHIFTBITS_SP32;
- int uy = as_int(y);
+ int uy = __clc_as_int(y);
int ay = uy & EXSIGNBIT_SP32;
- float ya = as_float(ay);
+ float ya = __clc_as_float(ay);
int ey = ay >> EXPSHIFTBITS_SP32;
- float xr = as_float(0x3f800000 | (ax & 0x007fffff));
- float yr = as_float(0x3f800000 | (ay & 0x007fffff));
+ float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff));
+ float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff));
int c;
int k = ex - ey;
@@ -71,17 +72,17 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remainder(float x, float y) {
xr -= c ? yr : 0.0f;
q += c;
- float s = as_float(ey << EXPSHIFTBITS_SP32);
+ float s = __clc_as_float(ey << EXPSHIFTBITS_SP32);
xr *= lt ? 1.0f : s;
c = ax == ay;
xr = c ? 0.0f : xr;
- xr = as_float(sx ^ as_int(xr));
+ xr = __clc_as_float(sx ^ __clc_as_int(xr));
c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 |
ay == 0;
- xr = c ? as_float(QNANBITPATT_SP32) : xr;
+ xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr;
return xr;
}
@@ -90,18 +91,18 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_remainder, float,
#ifdef cl_khr_fp64
_CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
- ulong ux = as_ulong(x);
+ ulong ux = __clc_as_ulong(x);
ulong ax = ux & ~SIGNBIT_DP64;
ulong xsgn = ux ^ ax;
- double dx = as_double(ax);
- int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
+ double dx = __clc_as_double(ax);
+ int xexp = __clc_convert_int(ax >> EXPSHIFTBITS_DP64);
int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64);
xexp1 = xexp < 1 ? xexp1 : xexp;
- ulong uy = as_ulong(y);
+ ulong uy = __clc_as_ulong(y);
ulong ay = uy & ~SIGNBIT_DP64;
- double dy = as_double(ay);
- int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
+ double dy = __clc_as_double(ay);
+ int yexp = __clc_convert_int(ay >> EXPSHIFTBITS_DP64);
int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64);
yexp1 = yexp < 1 ? yexp1 : yexp;
@@ -181,12 +182,12 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
dx = dy < 0x1.0p+1022 ? dxl : dxg;
- double ret = as_double(xsgn ^ as_ulong(dx));
- dx = as_double(ax);
+ double ret = __clc_as_double(xsgn ^ __clc_as_ulong(dx));
+ dx = __clc_as_double(ax);
// Now handle |x| == |y|
int c = dx == dy;
- t = as_double(xsgn);
+ t = __clc_as_double(xsgn);
ret = c ? t : ret;
// Next, handle |x| < |y|
@@ -203,7 +204,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
// |y| is 0
c = dy == 0.0;
- ret = c ? as_double(QNANBITPATT_DP64) : ret;
+ ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret;
// y is +-Inf, NaN
c = yexp > BIASEDEMAX_DP64;
@@ -212,7 +213,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
// x is +=Inf, NaN
c = xexp > BIASEDEMAX_DP64;
- ret = c ? as_double(QNANBITPATT_DP64) : ret;
+ ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret;
return ret;
}
diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl
index af3e7a2b07500..699517e180708 100644
--- a/libclc/generic/lib/math/clc_remquo.cl
+++ b/libclc/generic/lib/math/clc_remquo.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/integer/clc_clz.h>
#include <clc/math/clc_floor.h>
@@ -34,20 +35,20 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y,
__private int *quo) {
x = __clc_flush_denormal_if_not_supported(x);
y = __clc_flush_denormal_if_not_supported(y);
- int ux = as_int(x);
+ int ux = __clc_as_int(x);
int ax = ux & EXSIGNBIT_SP32;
- float xa = as_float(ax);
+ float xa = __clc_as_float(ax);
int sx = ux ^ ax;
int ex = ax >> EXPSHIFTBITS_SP32;
- int uy = as_int(y);
+ int uy = __clc_as_int(y);
int ay = uy & EXSIGNBIT_SP32;
- float ya = as_float(ay);
+ float ya = __clc_as_float(ay);
int sy = uy ^ ay;
int ey = ay >> EXPSHIFTBITS_SP32;
- float xr = as_float(0x3f800000 | (ax & 0x007fffff));
- float yr = as_float(0x3f800000 | (ay & 0x007fffff));
+ float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff));
+ float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff));
int c;
int k = ex - ey;
@@ -75,7 +76,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y,
xr -= c ? yr : 0.0f;
q += c;
- float s = as_float(ey << EXPSHIFTBITS_SP32);
+ float s = __clc_as_float(ey << EXPSHIFTBITS_SP32);
xr *= lt ? 1.0f : s;
int qsgn = sx == sy ? 1 : -1;
@@ -85,12 +86,12 @@ _CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y,
quot = c ? qsgn : quot;
xr = c ? 0.0f : xr;
- xr = as_float(sx ^ as_int(xr));
+ xr = __clc_as_float(sx ^ __clc_as_int(xr));
c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 |
ay == 0;
quot = c ? 0 : quot;
- xr = c ? as_float(QNANBITPATT_SP32) : xr;
+ xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr;
*quo = quot;
@@ -130,18 +131,18 @@ __VEC_REMQUO(float, 16, 8)
#ifdef cl_khr_fp64
_CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y,
__private int *pquo) {
- ulong ux = as_ulong(x);
+ ulong ux = __clc_as_ulong(x);
ulong ax = ux & ~SIGNBIT_DP64;
ulong xsgn = ux ^ ax;
- double dx = as_double(ax);
- int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
+ double dx = __clc_as_double(ax);
+ int xexp = __clc_convert_int(ax >> EXPSHIFTBITS_DP64);
int xexp1 = 11 - (int)__clc_clz(ax & MANTBITS_DP64);
xexp1 = xexp < 1 ? xexp1 : xexp;
- ulong uy = as_ulong(y);
+ ulong uy = __clc_as_ulong(y);
ulong ay = uy & ~SIGNBIT_DP64;
- double dy = as_double(ay);
- int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
+ double dy = __clc_as_double(ay);
+ int yexp = __clc_convert_int(ay >> EXPSHIFTBITS_DP64);
int yexp1 = 11 - (int)__clc_clz(ay & MANTBITS_DP64);
yexp1 = yexp < 1 ? yexp1 : yexp;
@@ -223,12 +224,12 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y,
lt += dy < 0x1.0p+1022 ? al : ag;
int quo = ((int)lt & 0x7f) * qsgn;
- double ret = as_double(xsgn ^ as_ulong(dx));
- dx = as_double(ax);
+ double ret = __clc_as_double(xsgn ^ __clc_as_ulong(dx));
+ dx = __clc_as_double(ax);
// Now handle |x| == |y|
int c = dx == dy;
- t = as_double(xsgn);
+ t = __clc_as_double(xsgn);
quo = c ? qsgn : quo;
ret = c ? t : ret;
@@ -249,7 +250,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y,
// |y| is 0
c = dy == 0.0;
quo = c ? 0 : quo;
- ret = c ? as_double(QNANBITPATT_DP64) : ret;
+ ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret;
// y is +-Inf, NaN
c = yexp > BIASEDEMAX_DP64;
@@ -260,7 +261,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y,
// x is +=Inf, NaN
c = xexp > BIASEDEMAX_DP64;
quo = c ? 0 : quo;
- ret = c ? as_double(QNANBITPATT_DP64) : ret;
+ ret = c ? __clc_as_double(QNANBITPATT_DP64) : ret;
*pquo = quo;
return ret;
diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl
index 42b983784c14d..dabaa2a4f3f2a 100644
--- a/libclc/generic/lib/math/clc_rootn.cl
+++ b/libclc/generic/lib/math/clc_rootn.cl
@@ -21,6 +21,7 @@
*/
#include <clc/clc.h>
+#include <clc/clc_convert.h>
#include <clc/clcmacro.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_mad.h>
@@ -67,17 +68,17 @@
_CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) {
float y = MATH_RECIP((float)ny);
- int ix = as_int(x);
+ int ix = __clc_as_int(x);
int ax = ix & EXSIGNBIT_SP32;
int xpos = ix == ax;
- int iy = as_int(y);
+ int iy = __clc_as_int(y);
int ay = iy & EXSIGNBIT_SP32;
int ypos = iy == ay;
// Extra precise log calculation
// First handle case that x is close to 1
- float r = 1.0f - as_float(ax);
+ float r = 1.0f - __clc_as_float(ax);
int near1 = __clc_fabs(r) < 0x1.0p-4f;
float r2 = r * r;
@@ -101,7 +102,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) {
// Computations for x not near 1
int m = (int)(ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
float mf = (float)m;
- int ixs = as_int(as_float(ax | 0x3f800000) - 1.0f);
+ int ixs = __clc_as_int(__clc_as_float(ax | 0x3f800000) - 1.0f);
float mfs = (float)((ixs >> EXPSHIFTBITS_SP32) - 253);
int c = m == -127;
int ixn = c ? ixs : ax;
@@ -110,8 +111,8 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) {
int indx = (ixn & 0x007f0000) + ((ixn & 0x00008000) << 1);
// F - Y
- float f = as_float(0x3f000000 | indx) -
- as_float(0x3f000000 | (ixn & MANTBITS_SP32));
+ float f = __clc_as_float(0x3f000000 | indx) -
+ __clc_as_float(0x3f000000 | (ixn & MANTBITS_SP32));
indx = indx >> 16;
float2 tv = USE_TABLE(log_inv_tbl_ep, indx);
@@ -139,13 +140,13 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) {
lh = near1 ? lh_near1 : lh;
l = near1 ? l_near1 : l;
- float gh = as_float(as_int(l) & 0xfffff000);
+ float gh = __clc_as_float(__clc_as_int(l) & 0xfffff000);
float gt = ((ltt - (lt - lth)) + ((lh - l) + lt)) + (l - gh);
- float yh = as_float(iy & 0xfffff000);
+ float yh = __clc_as_float(iy & 0xfffff000);
float fny = (float)ny;
- float fnyh = as_float(as_int(fny) & 0xfffff000);
+ float fnyh = __clc_as_float(__clc_as_int(fny) & 0xfffff000);
float fnyt = (float)(ny - (int)fnyh);
float yt = MATH_DIVIDE(__clc_mad(-fnyt, yh, __clc_mad(-fnyh, yh, 1.0f)), fny);
@@ -155,7 +156,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) {
// Extra precise exp of ylogx
const float R_64_BY_LOG2 = 0x1.715476p+6f; // 64/log2 : 92.332482616893657
- int n = convert_int(ylogx * R_64_BY_LOG2);
+ int n = __clc_convert_int(ylogx * R_64_BY_LOG2);
float nf = (float)n;
int j = n & 0x3f;
@@ -179,16 +180,16 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) {
float expylogx =
__clc_mad(tv.s0, poly, __clc_mad(tv.s1, poly, tv.s1)) + tv.s0;
float sexpylogx = __clc_fp32_subnormals_supported()
- ? expylogx * as_float(0x1 << (m + 149))
+ ? expylogx * __clc_as_float(0x1 << (m + 149))
: 0.0f;
- float texpylogx = as_float(as_int(expylogx) + m2);
+ float texpylogx = __clc_as_float(__clc_as_int(expylogx) + m2);
expylogx = m < -125 ? sexpylogx : texpylogx;
// Result is +-Inf if (ylogx + ylogx_t) > 128*log2
expylogx = ((ylogx > 0x1.62e430p+6f) |
(ylogx == 0x1.62e430p+6f & ylogx_t > -0x1.05c610p-22f))
- ? as_float(PINFBITPATT_SP32)
+ ? __clc_as_float(PINFBITPATT_SP32)
: expylogx;
// Result is 0 if ylogx < -149*log2
@@ -201,9 +202,9 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) {
int inty = 2 - (ny & 1);
- float signval = as_float((as_uint(expylogx) ^ SIGNBIT_SP32));
+ float signval = __clc_as_float((__clc_as_uint(expylogx) ^ SIGNBIT_SP32));
expylogx = ((inty == 1) & !xpos) ? signval : expylogx;
- int ret = as_int(expylogx);
+ int ret = __clc_as_int(expylogx);
// Corner case handling
ret = (!xpos & (inty == 2)) ? QNANBITPATT_SP32 : ret;
@@ -221,7 +222,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny) {
ret = ax > PINFBITPATT_SP32 ? ix : ret;
ret = ny == 0 ? QNANBITPATT_SP32 : ret;
- return as_float(ret);
+ return __clc_as_float(ret);
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_rootn, float, int)
@@ -233,11 +234,11 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) {
double dny = (double)ny;
double y = 1.0 / dny;
- long ux = as_long(x);
+ long ux = __clc_as_long(x);
long ax = ux & (~SIGNBIT_DP64);
int xpos = ax == ux;
- long uy = as_long(y);
+ long uy = __clc_as_long(y);
long ay = uy & (~SIGNBIT_DP64);
int ypos = ay == uy;
@@ -249,7 +250,8 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) {
double xexp = (double)exp;
long mantissa = ax & 0x000FFFFFFFFFFFFFL;
- long temp_ux = as_long(as_double(0x3ff0000000000000L | mantissa) - 1.0);
+ long temp_ux =
+ __clc_as_long(__clc_as_double(0x3ff0000000000000L | mantissa) - 1.0);
exp = ((temp_ux & 0x7FF0000000000000L) >> 52) - 2045;
double xexp1 = (double)exp;
long mantissa1 = temp_ux & 0x000FFFFFFFFFFFFFL;
@@ -261,14 +263,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) {
((mantissa & 0x0000080000000000) << 1);
int index = rax >> 44;
- double F = as_double(rax | 0x3FE0000000000000L);
- double Y = as_double(mantissa | 0x3FE0000000000000L);
+ double F = __clc_as_double(rax | 0x3FE0000000000000L);
+ double Y = __clc_as_double(mantissa | 0x3FE0000000000000L);
double f = F - Y;
double2 tv = USE_TABLE(log_f_inv_tbl, index);
double log_h = tv.s0;
double log_t = tv.s1;
double f_inv = (log_h + log_t) * f;
- double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L);
+ double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
double r2 = fma(-F, r1, f) * (log_h + log_t);
double r = r1 + r2;
@@ -292,14 +294,14 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) {
double resT_h = poly0h;
double H = resT + resH;
- double H_h = as_double(as_long(H) & 0xfffffffff8000000L);
+ double H_h = __clc_as_double(__clc_as_long(H) & 0xfffffffff8000000L);
double T = (resH - H + resT) + (resT_t - (resT + resT_h)) + (H - H_h);
H = H_h;
- double y_head = as_double(uy & 0xfffffffff8000000L);
+ double y_head = __clc_as_double(uy & 0xfffffffff8000000L);
double y_tail = y - y_head;
- double fnyh = as_double(as_long(dny) & 0xfffffffffff00000);
+ double fnyh = __clc_as_double(__clc_as_long(dny) & 0xfffffffffff00000);
double fnyt = (double)(ny - (int)fnyh);
y_tail = fma(-fnyt, y_head, fma(-fnyh, y_head, 1.0)) / dny;
@@ -346,7 +348,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) {
expv = fma(f, q, f2) + f1;
expv = ldexp(expv, m);
- expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv;
+ expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;
expv = v < min_exp_arg ? 0.0 : expv;
}
@@ -359,7 +361,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) {
expv *= ((inty == 1) & !xpos) ? -1.0 : 1.0;
- long ret = as_long(expv);
+ long ret = __clc_as_long(expv);
// Now all the edge cases
ret = (!xpos & (inty == 2)) ? QNANBITPATT_DP64 : ret;
@@ -377,7 +379,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rootn(double x, int ny) {
ret = ((ux == PINFBITPATT_DP64) & ypos) ? PINFBITPATT_DP64 : ret;
ret = ax > PINFBITPATT_DP64 ? ux : ret;
ret = ny == 0 ? QNANBITPATT_DP64 : ret;
- return as_double(ret);
+ return __clc_as_double(ret);
}
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int)
#endif
@@ -387,7 +389,7 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_rootn, double, int)
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD _CLC_DEF half __clc_rootn(half x, int y) {
- return (half)__clc_rootn((float)x, y);
+ return (half)__clc_rootn((float)x, y);
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_rootn, half, int);
More information about the cfe-commits
mailing list