[libclc] [libclc] Optimize CLC vector any/all builtins (PR #124568)

Fraser Cormack via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 27 07:36:23 PST 2025


https://github.com/frasercrmck created https://github.com/llvm/llvm-project/pull/124568

By using the vector reduction buitins we can avoid scalarization. Targets that don't support vector reductions will scalarize later on anyway. The vector reduction builtins should be well-enough supported by the middle-end to be a generic solution.

This produces conceptually equivalent code: all vector elements are OR'd/AND'd together and the final scalar is bit-shifted and masked to produce the final result.

The 'normalize' builtin uses 'all' so its code has similarly improved in places.

>From 2d7e3ce1152011c1140ea3b6ef7cfe8158e58e96 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser at codeplay.com>
Date: Mon, 27 Jan 2025 15:29:57 +0000
Subject: [PATCH] [libclc] Optimize CLC vector any/all builtins

By using the vector reduction buitins we can avoid scalarization.
Targets that don't support vector reductions will scalarize later on
anyway. The vector reduction builtins should be well-enough supported
by the middle-end to be a generic solution.

This produces conceptually equivalent code: all vector elements are
OR'd/AND'd together and the final scalar is bit-shifted and masked to
produce the final result.

The 'normalize' builtin uses 'all' so its code has similarly improved in
places.
---
 libclc/clc/lib/generic/relational/clc_all.cl | 37 ++++++++------------
 libclc/clc/lib/generic/relational/clc_any.cl | 37 ++++++++------------
 2 files changed, 30 insertions(+), 44 deletions(-)

diff --git a/libclc/clc/lib/generic/relational/clc_all.cl b/libclc/clc/lib/generic/relational/clc_all.cl
index e371126d144f06..5193f0f743179d 100644
--- a/libclc/clc/lib/generic/relational/clc_all.cl
+++ b/libclc/clc/lib/generic/relational/clc_all.cl
@@ -1,28 +1,21 @@
 #include <clc/internal/clc.h>
 
 #define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
-#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1))
-#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2))
-#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3))
-#define _CLC_ALL8(v)                                                           \
-  (_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) & _CLC_ALL((v).s6) &   \
-   _CLC_ALL((v).s7))
-#define _CLC_ALL16(v)                                                          \
-  (_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) & _CLC_ALL((v).sA) &   \
-   _CLC_ALL((v).sB) & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) & _CLC_ALL((v).sE) & \
-   _CLC_ALL((v).sf))
 
-#define ALL_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v)
+#define _CLC_ALL_VEC(TYPE)                                                     \
+  _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) {                               \
+    return _CLC_ALL(__builtin_reduce_and(v));                                  \
+  }
 
-#define ALL_VECTORIZE(TYPE)                                                    \
-  ALL_ID(TYPE) { return _CLC_ALL(v); }                                         \
-  ALL_ID(TYPE##2) { return _CLC_ALL2(v); }                                     \
-  ALL_ID(TYPE##3) { return _CLC_ALL3(v); }                                     \
-  ALL_ID(TYPE##4) { return _CLC_ALL4(v); }                                     \
-  ALL_ID(TYPE##8) { return _CLC_ALL8(v); }                                     \
-  ALL_ID(TYPE##16) { return _CLC_ALL16(v); }
+#define _CLC_DEFINE_ALL(TYPE)                                                  \
+  _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) { return _CLC_ALL(v); }         \
+  _CLC_ALL_VEC(TYPE##2)                                                        \
+  _CLC_ALL_VEC(TYPE##3)                                                        \
+  _CLC_ALL_VEC(TYPE##4)                                                        \
+  _CLC_ALL_VEC(TYPE##8)                                                        \
+  _CLC_ALL_VEC(TYPE##16)
 
-ALL_VECTORIZE(char)
-ALL_VECTORIZE(short)
-ALL_VECTORIZE(int)
-ALL_VECTORIZE(long)
+_CLC_DEFINE_ALL(char)
+_CLC_DEFINE_ALL(short)
+_CLC_DEFINE_ALL(int)
+_CLC_DEFINE_ALL(long)
diff --git a/libclc/clc/lib/generic/relational/clc_any.cl b/libclc/clc/lib/generic/relational/clc_any.cl
index e69f2113c94f55..e86bafaac19933 100644
--- a/libclc/clc/lib/generic/relational/clc_any.cl
+++ b/libclc/clc/lib/generic/relational/clc_any.cl
@@ -1,28 +1,21 @@
 #include <clc/internal/clc.h>
 
 #define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1)
-#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1))
-#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2))
-#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3))
-#define _CLC_ANY8(v)                                                           \
-  (_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) | _CLC_ANY((v).s6) |   \
-   _CLC_ANY((v).s7))
-#define _CLC_ANY16(v)                                                          \
-  (_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) | _CLC_ANY((v).sA) |   \
-   _CLC_ANY((v).sB) | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) | _CLC_ANY((v).sE) | \
-   _CLC_ANY((v).sf))
 
-#define ANY_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v)
+#define _CLC_ANY_VEC(TYPE)                                                     \
+  _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) {                               \
+    return _CLC_ANY(__builtin_reduce_or(v));                                   \
+  }
 
-#define ANY_VECTORIZE(TYPE)                                                    \
-  ANY_ID(TYPE) { return _CLC_ANY(v); }                                         \
-  ANY_ID(TYPE##2) { return _CLC_ANY2(v); }                                     \
-  ANY_ID(TYPE##3) { return _CLC_ANY3(v); }                                     \
-  ANY_ID(TYPE##4) { return _CLC_ANY4(v); }                                     \
-  ANY_ID(TYPE##8) { return _CLC_ANY8(v); }                                     \
-  ANY_ID(TYPE##16) { return _CLC_ANY16(v); }
+#define _CLC_DEFINE_ANY(TYPE)                                                  \
+  _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) { return _CLC_ANY(v); }         \
+  _CLC_ANY_VEC(TYPE##2)                                                        \
+  _CLC_ANY_VEC(TYPE##3)                                                        \
+  _CLC_ANY_VEC(TYPE##4)                                                        \
+  _CLC_ANY_VEC(TYPE##8)                                                        \
+  _CLC_ANY_VEC(TYPE##16)
 
-ANY_VECTORIZE(char)
-ANY_VECTORIZE(short)
-ANY_VECTORIZE(int)
-ANY_VECTORIZE(long)
+_CLC_DEFINE_ANY(char)
+_CLC_DEFINE_ANY(short)
+_CLC_DEFINE_ANY(int)
+_CLC_DEFINE_ANY(long)



More information about the cfe-commits mailing list