[libclc] c3a0fcc - [libclc] Optimize CLC vector any/all builtins (#124568)

via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 27 08:37:24 PST 2025


Author: Fraser Cormack
Date: 2025-01-27T16:37:21Z
New Revision: c3a0fcc982061f9a69cfc1199dc91bd1fc3158c0

URL: https://github.com/llvm/llvm-project/commit/c3a0fcc982061f9a69cfc1199dc91bd1fc3158c0
DIFF: https://github.com/llvm/llvm-project/commit/c3a0fcc982061f9a69cfc1199dc91bd1fc3158c0.diff

LOG: [libclc] Optimize CLC vector any/all builtins (#124568)

By using the vector reduction buitins we can avoid scalarization.
Targets that don't support vector reductions will scalarize later on
anyway. The vector reduction builtins should be well-enough supported by
the middle-end to be a generic solution.

This produces conceptually equivalent code: all vector elements are
OR'd/AND'd together and the final scalar is bit-shifted and masked to
produce the final result.

The 'normalize' builtin uses 'all' so its code has similarly improved in
places.

Added: 
    

Modified: 
    libclc/clc/lib/generic/relational/clc_all.cl
    libclc/clc/lib/generic/relational/clc_any.cl

Removed: 
    


################################################################################
diff  --git a/libclc/clc/lib/generic/relational/clc_all.cl b/libclc/clc/lib/generic/relational/clc_all.cl
index e371126d144f06..5193f0f743179d 100644
--- a/libclc/clc/lib/generic/relational/clc_all.cl
+++ b/libclc/clc/lib/generic/relational/clc_all.cl
@@ -1,28 +1,21 @@
 #include <clc/internal/clc.h>
 
 #define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
-#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1))
-#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2))
-#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3))
-#define _CLC_ALL8(v)                                                           \
-  (_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) & _CLC_ALL((v).s6) &   \
-   _CLC_ALL((v).s7))
-#define _CLC_ALL16(v)                                                          \
-  (_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) & _CLC_ALL((v).sA) &   \
-   _CLC_ALL((v).sB) & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) & _CLC_ALL((v).sE) & \
-   _CLC_ALL((v).sf))
 
-#define ALL_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v)
+#define _CLC_ALL_VEC(TYPE)                                                     \
+  _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) {                               \
+    return _CLC_ALL(__builtin_reduce_and(v));                                  \
+  }
 
-#define ALL_VECTORIZE(TYPE)                                                    \
-  ALL_ID(TYPE) { return _CLC_ALL(v); }                                         \
-  ALL_ID(TYPE##2) { return _CLC_ALL2(v); }                                     \
-  ALL_ID(TYPE##3) { return _CLC_ALL3(v); }                                     \
-  ALL_ID(TYPE##4) { return _CLC_ALL4(v); }                                     \
-  ALL_ID(TYPE##8) { return _CLC_ALL8(v); }                                     \
-  ALL_ID(TYPE##16) { return _CLC_ALL16(v); }
+#define _CLC_DEFINE_ALL(TYPE)                                                  \
+  _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) { return _CLC_ALL(v); }         \
+  _CLC_ALL_VEC(TYPE##2)                                                        \
+  _CLC_ALL_VEC(TYPE##3)                                                        \
+  _CLC_ALL_VEC(TYPE##4)                                                        \
+  _CLC_ALL_VEC(TYPE##8)                                                        \
+  _CLC_ALL_VEC(TYPE##16)
 
-ALL_VECTORIZE(char)
-ALL_VECTORIZE(short)
-ALL_VECTORIZE(int)
-ALL_VECTORIZE(long)
+_CLC_DEFINE_ALL(char)
+_CLC_DEFINE_ALL(short)
+_CLC_DEFINE_ALL(int)
+_CLC_DEFINE_ALL(long)

diff  --git a/libclc/clc/lib/generic/relational/clc_any.cl b/libclc/clc/lib/generic/relational/clc_any.cl
index e69f2113c94f55..e86bafaac19933 100644
--- a/libclc/clc/lib/generic/relational/clc_any.cl
+++ b/libclc/clc/lib/generic/relational/clc_any.cl
@@ -1,28 +1,21 @@
 #include <clc/internal/clc.h>
 
 #define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
-#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1))
-#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2))
-#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3))
-#define _CLC_ANY8(v)                                                           \
-  (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) | _CLC_ANY((v).s6) |   \
-   _CLC_ANY((v).s7))
-#define _CLC_ANY16(v)                                                          \
-  (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) | _CLC_ANY((v).sA) |   \
-   _CLC_ANY((v).sB) | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) | _CLC_ANY((v).sE) | \
-   _CLC_ANY((v).sf))
 
-#define ANY_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v)
+#define _CLC_ANY_VEC(TYPE)                                                     \
+  _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) {                               \
+    return _CLC_ANY(__builtin_reduce_or(v));                                   \
+  }
 
-#define ANY_VECTORIZE(TYPE)                                                    \
-  ANY_ID(TYPE) { return _CLC_ANY(v); }                                         \
-  ANY_ID(TYPE##2) { return _CLC_ANY2(v); }                                     \
-  ANY_ID(TYPE##3) { return _CLC_ANY3(v); }                                     \
-  ANY_ID(TYPE##4) { return _CLC_ANY4(v); }                                     \
-  ANY_ID(TYPE##8) { return _CLC_ANY8(v); }                                     \
-  ANY_ID(TYPE##16) { return _CLC_ANY16(v); }
+#define _CLC_DEFINE_ANY(TYPE)                                                  \
+  _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) { return _CLC_ANY(v); }         \
+  _CLC_ANY_VEC(TYPE##2)                                                        \
+  _CLC_ANY_VEC(TYPE##3)                                                        \
+  _CLC_ANY_VEC(TYPE##4)                                                        \
+  _CLC_ANY_VEC(TYPE##8)                                                        \
+  _CLC_ANY_VEC(TYPE##16)
 
-ANY_VECTORIZE(char)
-ANY_VECTORIZE(short)
-ANY_VECTORIZE(int)
-ANY_VECTORIZE(long)
+_CLC_DEFINE_ANY(char)
+_CLC_DEFINE_ANY(short)
+_CLC_DEFINE_ANY(int)
+_CLC_DEFINE_ANY(long)


        


More information about the cfe-commits mailing list