[libclc] [libclc] Optimize CLC vector is(un)ordered builtins (PR #124546)
Fraser Cormack via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 27 06:03:13 PST 2025
https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/124546
These are similar to 347fb208, but these builtins are expressed in terms of other builtins. The LLVM IR generated features the same fcmp ord/uno comparisons as before, but consistently in vector form.
>From 7ceda5f4333133a0dd195d8d84bc30a08b5b9743 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 27 Jan 2025 13:59:49 +0000
Subject: [PATCH] [libclc] Optimize CLC vector is(un)ordered builtins
These are similar to 347fb208, but these builtins are expressed in terms
of other builtins. The LLVM IR generated features the same fcmp ord/uno
comparisons as before, but consistently in vector form.
---
.../clc/include/clc/relational/relational.h | 79 -------------------
.../lib/generic/relational/clc_isordered.cl | 22 +++---
.../lib/generic/relational/clc_isunordered.cl | 26 +++---
3 files changed, 18 insertions(+), 109 deletions(-)
diff --git a/libclc/clc/include/clc/relational/relational.h b/libclc/clc/include/clc/relational/relational.h
index f32e7630203e4b..f269715cfc83c9 100644
--- a/libclc/clc/include/clc/relational/relational.h
+++ b/libclc/clc/include/clc/relational/relational.h
@@ -63,85 +63,6 @@
ARG_TYPE) \
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, FUNCTION, ARG_TYPE)
-#define _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, \
- ARG0_TYPE, ARG1_TYPE) \
- _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return BUILTIN_NAME(x, y); \
- }
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC(RET_TYPE, FUNCTION, ARG0_TYPE, \
- ARG1_TYPE) \
- _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
- FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
- }
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE, FUNCTION, ARG0_TYPE, \
- ARG1_TYPE) \
- _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)((RET_TYPE){FUNCTION(x.lo, y.lo), \
- FUNCTION(x.hi, y.hi)} != (RET_TYPE)0); \
- }
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE, FUNCTION, ARG0_TYPE, \
- ARG1_TYPE) \
- _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
- FUNCTION(x.s2, y.s2)} != (RET_TYPE)0); \
- }
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE, FUNCTION, ARG0_TYPE, \
- ARG1_TYPE) \
- _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
- FUNCTION(x.s2, y.s2), \
- FUNCTION(x.s3, y.s3)} != (RET_TYPE)0); \
- }
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE, FUNCTION, ARG0_TYPE, \
- ARG1_TYPE) \
- _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
- FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
- FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
- FUNCTION(x.s6, y.s6), \
- FUNCTION(x.s7, y.s7)} != (RET_TYPE)0); \
- }
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE, FUNCTION, ARG0_TYPE, \
- ARG1_TYPE) \
- _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG0_TYPE x, ARG1_TYPE y) { \
- return (RET_TYPE)((RET_TYPE){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
- FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3), \
- FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5), \
- FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), \
- FUNCTION(x.s8, y.s8), FUNCTION(x.s9, y.s9), \
- FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb), \
- FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), \
- FUNCTION(x.se, y.se), \
- FUNCTION(x.sf, y.sf)} != (RET_TYPE)0); \
- }
-
-#define _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
- ARG1_TYPE) \
- _CLC_DEFINE_RELATIONAL_BINARY_VEC2(RET_TYPE##2, FUNCTION, ARG0_TYPE##2, \
- ARG1_TYPE##2) \
- _CLC_DEFINE_RELATIONAL_BINARY_VEC3(RET_TYPE##3, FUNCTION, ARG0_TYPE##3, \
- ARG1_TYPE##3) \
- _CLC_DEFINE_RELATIONAL_BINARY_VEC4(RET_TYPE##4, FUNCTION, ARG0_TYPE##4, \
- ARG1_TYPE##4) \
- _CLC_DEFINE_RELATIONAL_BINARY_VEC8(RET_TYPE##8, FUNCTION, ARG0_TYPE##8, \
- ARG1_TYPE##8) \
- _CLC_DEFINE_RELATIONAL_BINARY_VEC16(RET_TYPE##16, FUNCTION, ARG0_TYPE##16, \
- ARG1_TYPE##16)
-
-#define _CLC_DEFINE_RELATIONAL_BINARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
- ARG0_TYPE, ARG1_TYPE) \
- _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, \
- ARG0_TYPE, ARG1_TYPE) \
- _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
- ARG1_TYPE)
-
#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
diff --git a/libclc/clc/lib/generic/relational/clc_isordered.cl b/libclc/clc/lib/generic/relational/clc_isordered.cl
index 6183d1ddf918f5..73cd96a0a56ed8 100644
--- a/libclc/clc/lib/generic/relational/clc_isordered.cl
+++ b/libclc/clc/lib/generic/relational/clc_isordered.cl
@@ -2,33 +2,29 @@
#include <clc/relational/clc_isequal.h>
#include <clc/relational/relational.h>
-#define _CLC_DEFINE_ISORDERED(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
- _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
- return __clc_isequal(x, x) && __clc_isequal(y, y); \
- }
+#define _CLC_RELATIONAL_OP(X, Y) \
+ __clc_isequal((X), (X)) && __clc_isequal((Y), (Y))
-_CLC_DEFINE_ISORDERED(int, __clc_isordered, float, float)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isordered, float, float)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isordered, float, float)
#ifdef cl_khr_fp64
+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// The scalar version of __clc_isordered(double, double) returns an int, but the
// vector versions return long.
-
-_CLC_DEFINE_ISORDERED(int, __clc_isordered, double, double)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isordered, double, double)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isordered, double, double)
#endif
+
#ifdef cl_khr_fp16
+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// The scalar version of __clc_isordered(half, half) returns an int, but the
// vector versions return short.
-
-_CLC_DEFINE_ISORDERED(int, __clc_isordered, half, half)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isordered, half, half)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isordered, half, half)
#endif
-#undef _CLC_DEFINE_ISORDERED
+#undef _CLC_RELATIONAL_OP
diff --git a/libclc/clc/lib/generic/relational/clc_isunordered.cl b/libclc/clc/lib/generic/relational/clc_isunordered.cl
index dbbec031a65e58..fefda8e5675176 100644
--- a/libclc/clc/lib/generic/relational/clc_isunordered.cl
+++ b/libclc/clc/lib/generic/relational/clc_isunordered.cl
@@ -1,12 +1,11 @@
#include <clc/internal/clc.h>
+#include <clc/relational/clc_isequal.h>
#include <clc/relational/relational.h>
-// Note: It would be nice to use __builtin_isunordered with vector inputs, but
-// it seems to only take scalar values as input, which will produce incorrect
-// output for vector input types.
+#define _CLC_RELATIONAL_OP(X, Y) \
+ !__clc_isequal((X), (X)) || !__clc_isequal((Y), (Y))
-_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered,
- float, float)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isunordered, float, float)
#ifdef cl_khr_fp64
@@ -14,25 +13,18 @@ _CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isunordered, __builtin_isunordered,
// The scalar version of __clc_isunordered(double, double) returns an int, but
// the vector versions return long.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(double x, double y) {
- return __builtin_isunordered(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isunordered, double, double)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isunordered, double, double)
#endif
+
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// The scalar version of __clc_isunordered(half, half) returns an int, but the
// vector versions return short.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_isunordered(half x, half y) {
- return __builtin_isunordered(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isunordered, half, half)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isunordered, half, half)
#endif
+
+#undef _CLC_RELATIONAL_OP
More information about the cfe-commits
mailing list