[Libclc-dev] [PATCH 1/2] r600/fmax: Flush denormals before calling builtin.
Jan Vesely via Libclc-dev
libclc-dev at lists.llvm.org
Mon May 21 08:36:46 PDT 2018
Same reason as amdgcn.
Fixes fmax, maxmag CTS on turks.
Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
r600/lib/SOURCES | 1 +
r600/lib/math/fmax.cl | 29 +++++++++++++++++++++++++++++
2 files changed, 30 insertions(+)
create mode 100644 r600/lib/math/fmax.cl
diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES
index 284052c..132b151 100644
--- a/r600/lib/SOURCES
+++ b/r600/lib/SOURCES
@@ -1,3 +1,4 @@
+math/fmax.cl
synchronization/barrier_impl.ll
workitem/get_global_offset.cl
workitem/get_group_id.cl
diff --git a/r600/lib/math/fmax.cl b/r600/lib/math/fmax.cl
new file mode 100644
index 0000000..e4b9e4c
--- /dev/null
+++ b/r600/lib/math/fmax.cl
@@ -0,0 +1,29 @@
+#include <clc/clc.h>
+
+#include "../../../generic/lib/clcmacro.h"
+#include "../../../generic/lib/math/math.h"
+
+_CLC_DEF _CLC_OVERLOAD float fmax(float x, float y)
+{
+ /* Flush denormals if not enabled. Otherwise fmax instruction flushes
+ * the values for comparison, but outputs original denormal */
+ x = __clc_flush_denormal_if_not_supported(x);
+ y = __clc_flush_denormal_if_not_supported(y);
+ return __builtin_fmaxf(x, y);
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmax, float, float)
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEF _CLC_OVERLOAD double fmax(double x, double y)
+{
+ return __builtin_fmax(x, y);
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmax, double, double)
+
+#endif
+
+#define __CLC_BODY <../../../generic/lib/math/fmax.inc>
+#include <clc/math/gentype.inc>
--
2.17.0
More information about the Libclc-dev
mailing list