[libclc] Revert "[NFC][libclc] Move _CLC_V_V_VP_VECTORIZE macro into clc_lgamma_r.cl and delete clcmacro.h (#156280)" (PR #157002)
Wenju He via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 4 21:05:54 PDT 2025
https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/157002
>From 8390286ffa32ce98ba39cfbe313d9396ce0572fc Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 5 Sep 2025 04:47:56 +0200
Subject: [PATCH 1/2] Revert "[NFC][libclc] Move _CLC_V_V_VP_VECTORIZE macro
into clc_lgamma_r.cl and delete clcmacro.h (#156280)"
This partially reverts commit d50f2ef437aeb1784f7556fd63639487f245ffaa
because _CLC_V_V_VP_VECTORIZE is also used in our downstream code:
https://github.com/intel/llvm/blob/0433e4d6f5c9/libclc/libspirv/lib/ptx-nvidiacl/math/modf.cl#L30
https://github.com/intel/llvm/blob/0433e4d6f5c9/libclc/libspirv/lib/ptx-nvidiacl/math/sincos.cl#L31
---
libclc/clc/include/clc/clcmacro.h | 69 +++++++++++++++++++++
libclc/clc/lib/generic/math/clc_lgamma_r.cl | 55 +---------------
2 files changed, 70 insertions(+), 54 deletions(-)
create mode 100644 libclc/clc/include/clc/clcmacro.h
diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
new file mode 100644
index 0000000000000..9fa11489b1457
--- /dev/null
+++ b/libclc/clc/include/clc/clcmacro.h
@@ -0,0 +1,69 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_CLCMACRO_H__
+#define __CLC_CLCMACRO_H__
+
+#include <clc/internal/clc.h>
+#include <clc/utils.h>
+
+#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \
+ ADDR_SPACE, ARG2_TYPE) \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
+ __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
+ ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
+ return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \
+ __CLC_FUNCTION(x.s1, ptr + 1)); \
+ } \
+ \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
+ __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
+ ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
+ return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \
+ __CLC_FUNCTION(x.s1, ptr + 1), \
+ __CLC_FUNCTION(x.s2, ptr + 2)); \
+ } \
+ \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
+ __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
+ ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
+ return (__CLC_XCONCAT(RET_TYPE, 4))( \
+ __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
+ __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \
+ } \
+ \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
+ __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
+ ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
+ return (__CLC_XCONCAT(RET_TYPE, 8))( \
+ __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
+ __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
+ __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
+ __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \
+ } \
+ \
+ DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
+ __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
+ ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
+ ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
+ return (__CLC_XCONCAT(RET_TYPE, 16))( \
+ __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
+ __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
+ __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
+ __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \
+ __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \
+ __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \
+ __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \
+ __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \
+ }
+
+#endif // __CLC_CLCMACRO_H__
diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
index 662b34a69bb72..20aa80a63dd37 100644
--- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl
+++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
+#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
@@ -16,60 +17,6 @@
#include <clc/math/clc_sinpi.h>
#include <clc/math/math.h>
-#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \
- ADDR_SPACE, ARG2_TYPE) \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \
- __CLC_FUNCTION(x.s1, ptr + 1)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \
- __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 4))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 8))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
- __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
- __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 16))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
- __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
- __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \
- __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \
- __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \
- __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \
- __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \
- }
-
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
>From 2eb8acf587d8d97705120b0a126ebf9b01aba1ba Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 5 Sep 2025 06:05:14 +0200
Subject: [PATCH 2/2] replace _CLC_V_V_VP_VECTORIZE with use of
unary_def_with_ptr_scalarize.inc
---
libclc/clc/include/clc/clcmacro.h | 69 ------------
.../shared/unary_def_with_ptr_scalarize.inc | 106 ++++++++++++++++++
libclc/clc/lib/generic/math/clc_lgamma_r.cl | 27 +++--
libclc/clc/lib/generic/math/clc_lgamma_r.inc | 4 +
4 files changed, 126 insertions(+), 80 deletions(-)
delete mode 100644 libclc/clc/include/clc/clcmacro.h
create mode 100644 libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc
diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
deleted file mode 100644
index 9fa11489b1457..0000000000000
--- a/libclc/clc/include/clc/clcmacro.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef __CLC_CLCMACRO_H__
-#define __CLC_CLCMACRO_H__
-
-#include <clc/internal/clc.h>
-#include <clc/utils.h>
-
-#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \
- ADDR_SPACE, ARG2_TYPE) \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \
- __CLC_FUNCTION(x.s1, ptr + 1)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \
- __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 4))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 8))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
- __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
- __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \
- } \
- \
- DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
- __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
- ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
- ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
- return (__CLC_XCONCAT(RET_TYPE, 16))( \
- __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
- __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
- __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
- __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \
- __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \
- __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \
- __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \
- __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \
- }
-
-#endif // __CLC_CLCMACRO_H__
diff --git a/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc
new file mode 100644
index 0000000000000..fff91d36e626d
--- /dev/null
+++ b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc
@@ -0,0 +1,106 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/utils.h>
+
+#ifdef __CLC_SCALAR
+
+#ifndef __CLC_IMPL_FUNCTION
+#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
+#endif
+
+#ifndef __CLC_RET_TYPE
+#define __CLC_RET_TYPE __CLC_GENTYPE
+#endif
+
+#ifndef __CLC_ARG1_TYPE
+#define __CLC_ARG1_TYPE __CLC_GENTYPE
+#endif
+
+#ifndef __CLC_ARG2_TYPE
+#define __CLC_ARG2_TYPE __CLC_GENTYPE
+#endif
+
+#define __CLC_RET_VECTYPE __CLC_XCONCAT(__CLC_RET_TYPE, __CLC_VECTOR_SIZE)
+#define __CLC_ARG1_VECTYPE __CLC_XCONCAT(__CLC_ARG1_TYPE, __CLC_VECTOR_SIZE)
+#define __CLC_ARG2_VECTYPE __CLC_XCONCAT(__CLC_ARG2_TYPE, __CLC_VECTOR_SIZE)
+
+#define __CLC_VECTOR_SIZE 2
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 3
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1),
+ __CLC_IMPL_FUNCTION(x.s2, p + 2));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 4
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1),
+ __CLC_IMPL_FUNCTION(x.s2, p + 2),
+ __CLC_IMPL_FUNCTION(x.s3, p + 3));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 8
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1),
+ __CLC_IMPL_FUNCTION(x.s2, p + 2),
+ __CLC_IMPL_FUNCTION(x.s3, p + 3),
+ __CLC_IMPL_FUNCTION(x.s4, p + 4),
+ __CLC_IMPL_FUNCTION(x.s5, p + 5),
+ __CLC_IMPL_FUNCTION(x.s6, p + 6),
+ __CLC_IMPL_FUNCTION(x.s7, p + 7));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 16
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+ __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+ return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+ __CLC_IMPL_FUNCTION(x.s1, p + 1),
+ __CLC_IMPL_FUNCTION(x.s2, p + 2),
+ __CLC_IMPL_FUNCTION(x.s3, p + 3),
+ __CLC_IMPL_FUNCTION(x.s4, p + 4),
+ __CLC_IMPL_FUNCTION(x.s5, p + 5),
+ __CLC_IMPL_FUNCTION(x.s6, p + 6),
+ __CLC_IMPL_FUNCTION(x.s7, p + 7),
+ __CLC_IMPL_FUNCTION(x.s8, p + 8),
+ __CLC_IMPL_FUNCTION(x.s9, p + 9),
+ __CLC_IMPL_FUNCTION(x.sa, p + 10),
+ __CLC_IMPL_FUNCTION(x.sb, p + 11),
+ __CLC_IMPL_FUNCTION(x.sc, p + 12),
+ __CLC_IMPL_FUNCTION(x.sd, p + 13),
+ __CLC_IMPL_FUNCTION(x.se, p + 14),
+ __CLC_IMPL_FUNCTION(x.sf, p + 15));
+}
+#undef __CLC_VECTOR_SIZE
+
+#undef __CLC_RET_VECTYPE
+#undef __CLC_ARG1_VECTYPE
+#undef __CLC_ARG2_VECTYPE
+
+#endif // __CLC_SCALAR
diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
index 20aa80a63dd37..5c9f673eef489 100644
--- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl
+++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
#include <clc/float/definitions.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
@@ -280,9 +279,6 @@ _CLC_OVERLOAD _CLC_DEF float __clc_lgamma_r(float x, private int *signp) {
return r;
}
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_lgamma_r, float,
- private, int)
-
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
// ====================================================
@@ -586,9 +582,7 @@ _CLC_OVERLOAD _CLC_DEF double __clc_lgamma_r(double x, private int *ip) {
return r;
}
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_lgamma_r, double,
- private, int)
-#endif
+#endif // cl_khr_fp64
#ifdef cl_khr_fp16
@@ -598,24 +592,35 @@ _CLC_OVERLOAD _CLC_DEF half __clc_lgamma_r(half x, private int *iptr) {
return (half)__clc_lgamma_r((float)x, iptr);
}
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_lgamma_r, half,
- private, int);
+#endif // cl_khr_fp16
+
+#define __CLC_FUNCTION __clc_lgamma_r
+#define __CLC_ARG2_TYPE int
-#endif
+#define __CLC_ADDRSPACE private
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_ADDRSPACE
#define __CLC_ADDRSPACE global
#define __CLC_BODY <clc_lgamma_r.inc>
#include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
#undef __CLC_ADDRSPACE
#define __CLC_ADDRSPACE local
#define __CLC_BODY <clc_lgamma_r.inc>
#include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
#undef __CLC_ADDRSPACE
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
#define __CLC_ADDRSPACE generic
#define __CLC_BODY <clc_lgamma_r.inc>
#include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
#undef __CLC_ADDRSPACE
-#endif
+#endif // _CLC_DISTINCT_GENERIC_AS_SUPPORTED
diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.inc b/libclc/clc/lib/generic/math/clc_lgamma_r.inc
index 87891efd44755..931fa089ff3d1 100644
--- a/libclc/clc/lib/generic/math/clc_lgamma_r.inc
+++ b/libclc/clc/lib/generic/math/clc_lgamma_r.inc
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
+#ifdef __CLC_SCALAR
+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
__clc_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
__CLC_INTN private_iptr;
@@ -13,3 +15,5 @@ __clc_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
*iptr = private_iptr;
return ret;
}
+
+#endif // __CLC_SCALAR
More information about the cfe-commits
mailing list