[libclc] Revert "[NFC][libclc] Move _CLC_V_V_VP_VECTORIZE macro into clc_lgamma_r.cl and delete clcmacro.h (#156280)" (PR #157002)

Thu Sep 4 21:05:54 PDT 2025

https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/157002

>From 8390286ffa32ce98ba39cfbe313d9396ce0572fc Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 5 Sep 2025 04:47:56 +0200
Subject: [PATCH 1/2] Revert "[NFC][libclc] Move _CLC_V_V_VP_VECTORIZE macro
 into clc_lgamma_r.cl and delete clcmacro.h (#156280)"

This partially reverts commit d50f2ef437aeb1784f7556fd63639487f245ffaa
because _CLC_V_V_VP_VECTORIZE is also used in our downstream code:
https://github.com/intel/llvm/blob/0433e4d6f5c9/libclc/libspirv/lib/ptx-nvidiacl/math/modf.cl#L30
https://github.com/intel/llvm/blob/0433e4d6f5c9/libclc/libspirv/lib/ptx-nvidiacl/math/sincos.cl#L31
---
 libclc/clc/include/clc/clcmacro.h           | 69 +++++++++++++++++++++
 libclc/clc/lib/generic/math/clc_lgamma_r.cl | 55 +---------------
 2 files changed, 70 insertions(+), 54 deletions(-)
 create mode 100644 libclc/clc/include/clc/clcmacro.h

diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
new file mode 100644
index 0000000000000..9fa11489b1457
--- /dev/null
+++ b/libclc/clc/include/clc/clcmacro.h
@@ -0,0 +1,69 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_CLCMACRO_H__
+#define __CLC_CLCMACRO_H__
+
+#include <clc/internal/clc.h>
+#include <clc/utils.h>
+
+#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE,   \
+                              ADDR_SPACE, ARG2_TYPE)                           \
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 2)                                          \
+      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x,                            \
+                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) {             \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
+    return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr),             \
+                                        __CLC_FUNCTION(x.s1, ptr + 1));        \
+  }                                                                            \
+                                                                               \
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 3)                                          \
+      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x,                            \
+                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) {             \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
+    return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr),             \
+                                        __CLC_FUNCTION(x.s1, ptr + 1),         \
+                                        __CLC_FUNCTION(x.s2, ptr + 2));        \
+  }                                                                            \
+                                                                               \
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 4)                                          \
+      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x,                            \
+                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) {             \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
+    return (__CLC_XCONCAT(RET_TYPE, 4))(                                       \
+        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
+        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3));         \
+  }                                                                            \
+                                                                               \
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 8)                                          \
+      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x,                            \
+                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) {             \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
+    return (__CLC_XCONCAT(RET_TYPE, 8))(                                       \
+        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
+        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3),          \
+        __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5),          \
+        __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7));         \
+  }                                                                            \
+                                                                               \
+  DECLSPEC __CLC_XCONCAT(RET_TYPE, 16)                                         \
+      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x,                           \
+                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) {            \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
+    return (__CLC_XCONCAT(RET_TYPE, 16))(                                      \
+        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
+        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3),          \
+        __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5),          \
+        __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7),          \
+        __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9),          \
+        __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11),        \
+        __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13),        \
+        __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15));       \
+  }
+
+#endif // __CLC_CLCMACRO_H__
diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
index 662b34a69bb72..20aa80a63dd37 100644
--- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl
+++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include <clc/clc_convert.h>
+#include <clc/clcmacro.h>
 #include <clc/float/definitions.h>
 #include <clc/internal/clc.h>
 #include <clc/math/clc_fabs.h>
@@ -16,60 +17,6 @@
 #include <clc/math/clc_sinpi.h>
 #include <clc/math/math.h>
 
-#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE,   \
-                              ADDR_SPACE, ARG2_TYPE)                           \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 2)                                          \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x,                            \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) {             \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr),             \
-                                        __CLC_FUNCTION(x.s1, ptr + 1));        \
-  }                                                                            \
-                                                                               \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 3)                                          \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x,                            \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) {             \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr),             \
-                                        __CLC_FUNCTION(x.s1, ptr + 1),         \
-                                        __CLC_FUNCTION(x.s2, ptr + 2));        \
-  }                                                                            \
-                                                                               \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 4)                                          \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x,                            \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) {             \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 4))(                                       \
-        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
-        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3));         \
-  }                                                                            \
-                                                                               \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 8)                                          \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x,                            \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) {             \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 8))(                                       \
-        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
-        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3),          \
-        __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5),          \
-        __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7));         \
-  }                                                                            \
-                                                                               \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 16)                                         \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x,                           \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) {            \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 16))(                                      \
-        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
-        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3),          \
-        __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5),          \
-        __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7),          \
-        __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9),          \
-        __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11),        \
-        __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13),        \
-        __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15));       \
-  }
-
 // ====================================================
 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 //

>From 2eb8acf587d8d97705120b0a126ebf9b01aba1ba Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Fri, 5 Sep 2025 06:05:14 +0200
Subject: [PATCH 2/2] replace _CLC_V_V_VP_VECTORIZE with use of
 unary_def_with_ptr_scalarize.inc

---
 libclc/clc/include/clc/clcmacro.h             |  69 ------------
 .../shared/unary_def_with_ptr_scalarize.inc   | 106 ++++++++++++++++++
 libclc/clc/lib/generic/math/clc_lgamma_r.cl   |  27 +++--
 libclc/clc/lib/generic/math/clc_lgamma_r.inc  |   4 +
 4 files changed, 126 insertions(+), 80 deletions(-)
 delete mode 100644 libclc/clc/include/clc/clcmacro.h
 create mode 100644 libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc

diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
deleted file mode 100644
index 9fa11489b1457..0000000000000
--- a/libclc/clc/include/clc/clcmacro.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef __CLC_CLCMACRO_H__
-#define __CLC_CLCMACRO_H__
-
-#include <clc/internal/clc.h>
-#include <clc/utils.h>
-
-#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE,   \
-                              ADDR_SPACE, ARG2_TYPE)                           \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 2)                                          \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x,                            \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) {             \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr),             \
-                                        __CLC_FUNCTION(x.s1, ptr + 1));        \
-  }                                                                            \
-                                                                               \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 3)                                          \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x,                            \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) {             \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr),             \
-                                        __CLC_FUNCTION(x.s1, ptr + 1),         \
-                                        __CLC_FUNCTION(x.s2, ptr + 2));        \
-  }                                                                            \
-                                                                               \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 4)                                          \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x,                            \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) {             \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 4))(                                       \
-        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
-        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3));         \
-  }                                                                            \
-                                                                               \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 8)                                          \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x,                            \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) {             \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 8))(                                       \
-        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
-        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3),          \
-        __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5),          \
-        __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7));         \
-  }                                                                            \
-                                                                               \
-  DECLSPEC __CLC_XCONCAT(RET_TYPE, 16)                                         \
-      __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x,                           \
-                     ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) {            \
-    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
-    return (__CLC_XCONCAT(RET_TYPE, 16))(                                      \
-        __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1),              \
-        __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3),          \
-        __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5),          \
-        __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7),          \
-        __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9),          \
-        __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11),        \
-        __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13),        \
-        __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15));       \
-  }
-
-#endif // __CLC_CLCMACRO_H__
diff --git a/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc
new file mode 100644
index 0000000000000..fff91d36e626d
--- /dev/null
+++ b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc
@@ -0,0 +1,106 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/utils.h>
+
+#ifdef __CLC_SCALAR
+
+#ifndef __CLC_IMPL_FUNCTION
+#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
+#endif
+
+#ifndef __CLC_RET_TYPE
+#define __CLC_RET_TYPE __CLC_GENTYPE
+#endif
+
+#ifndef __CLC_ARG1_TYPE
+#define __CLC_ARG1_TYPE __CLC_GENTYPE
+#endif
+
+#ifndef __CLC_ARG2_TYPE
+#define __CLC_ARG2_TYPE __CLC_GENTYPE
+#endif
+
+#define __CLC_RET_VECTYPE __CLC_XCONCAT(__CLC_RET_TYPE, __CLC_VECTOR_SIZE)
+#define __CLC_ARG1_VECTYPE __CLC_XCONCAT(__CLC_ARG1_TYPE, __CLC_VECTOR_SIZE)
+#define __CLC_ARG2_VECTYPE __CLC_XCONCAT(__CLC_ARG2_TYPE, __CLC_VECTOR_SIZE)
+
+#define __CLC_VECTOR_SIZE 2
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+  __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+
+  return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+                             __CLC_IMPL_FUNCTION(x.s1, p + 1));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 3
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+  __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+  return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+                             __CLC_IMPL_FUNCTION(x.s1, p + 1),
+                             __CLC_IMPL_FUNCTION(x.s2, p + 2));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 4
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+  __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+  return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+                             __CLC_IMPL_FUNCTION(x.s1, p + 1),
+                             __CLC_IMPL_FUNCTION(x.s2, p + 2),
+                             __CLC_IMPL_FUNCTION(x.s3, p + 3));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 8
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+  __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+  return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+                             __CLC_IMPL_FUNCTION(x.s1, p + 1),
+                             __CLC_IMPL_FUNCTION(x.s2, p + 2),
+                             __CLC_IMPL_FUNCTION(x.s3, p + 3),
+                             __CLC_IMPL_FUNCTION(x.s4, p + 4),
+                             __CLC_IMPL_FUNCTION(x.s5, p + 5),
+                             __CLC_IMPL_FUNCTION(x.s6, p + 6),
+                             __CLC_IMPL_FUNCTION(x.s7, p + 7));
+}
+#undef __CLC_VECTOR_SIZE
+
+#define __CLC_VECTOR_SIZE 16
+_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
+__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
+  __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
+  return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
+                             __CLC_IMPL_FUNCTION(x.s1, p + 1),
+                             __CLC_IMPL_FUNCTION(x.s2, p + 2),
+                             __CLC_IMPL_FUNCTION(x.s3, p + 3),
+                             __CLC_IMPL_FUNCTION(x.s4, p + 4),
+                             __CLC_IMPL_FUNCTION(x.s5, p + 5),
+                             __CLC_IMPL_FUNCTION(x.s6, p + 6),
+                             __CLC_IMPL_FUNCTION(x.s7, p + 7),
+                             __CLC_IMPL_FUNCTION(x.s8, p + 8),
+                             __CLC_IMPL_FUNCTION(x.s9, p + 9),
+                             __CLC_IMPL_FUNCTION(x.sa, p + 10),
+                             __CLC_IMPL_FUNCTION(x.sb, p + 11),
+                             __CLC_IMPL_FUNCTION(x.sc, p + 12),
+                             __CLC_IMPL_FUNCTION(x.sd, p + 13),
+                             __CLC_IMPL_FUNCTION(x.se, p + 14),
+                             __CLC_IMPL_FUNCTION(x.sf, p + 15));
+}
+#undef __CLC_VECTOR_SIZE
+
+#undef __CLC_RET_VECTYPE
+#undef __CLC_ARG1_VECTYPE
+#undef __CLC_ARG2_VECTYPE
+
+#endif // __CLC_SCALAR
diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
index 20aa80a63dd37..5c9f673eef489 100644
--- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl
+++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include <clc/clc_convert.h>
-#include <clc/clcmacro.h>
 #include <clc/float/definitions.h>
 #include <clc/internal/clc.h>
 #include <clc/math/clc_fabs.h>
@@ -280,9 +279,6 @@ _CLC_OVERLOAD _CLC_DEF float __clc_lgamma_r(float x, private int *signp) {
   return r;
 }
 
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_lgamma_r, float,
-                      private, int)
-
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 // ====================================================
@@ -586,9 +582,7 @@ _CLC_OVERLOAD _CLC_DEF double __clc_lgamma_r(double x, private int *ip) {
   return r;
 }
 
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_lgamma_r, double,
-                      private, int)
-#endif
+#endif // cl_khr_fp64
 
 #ifdef cl_khr_fp16
 
@@ -598,24 +592,35 @@ _CLC_OVERLOAD _CLC_DEF half __clc_lgamma_r(half x, private int *iptr) {
   return (half)__clc_lgamma_r((float)x, iptr);
 }
 
-_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_lgamma_r, half,
-                      private, int);
+#endif // cl_khr_fp16
+
+#define __CLC_FUNCTION __clc_lgamma_r
+#define __CLC_ARG2_TYPE int
 
-#endif
+#define __CLC_ADDRSPACE private
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_ADDRSPACE
 
 #define __CLC_ADDRSPACE global
 #define __CLC_BODY <clc_lgamma_r.inc>
 #include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
 #undef __CLC_ADDRSPACE
 
 #define __CLC_ADDRSPACE local
 #define __CLC_BODY <clc_lgamma_r.inc>
 #include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
 #undef __CLC_ADDRSPACE
 
 #if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
 #define __CLC_ADDRSPACE generic
 #define __CLC_BODY <clc_lgamma_r.inc>
 #include <clc/math/gentype.inc>
+#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
+#include <clc/math/gentype.inc>
 #undef __CLC_ADDRSPACE
-#endif
+#endif // _CLC_DISTINCT_GENERIC_AS_SUPPORTED
diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.inc b/libclc/clc/lib/generic/math/clc_lgamma_r.inc
index 87891efd44755..931fa089ff3d1 100644
--- a/libclc/clc/lib/generic/math/clc_lgamma_r.inc
+++ b/libclc/clc/lib/generic/math/clc_lgamma_r.inc
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifdef __CLC_SCALAR
+
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
 __clc_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
   __CLC_INTN private_iptr;
@@ -13,3 +15,5 @@ __clc_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
   *iptr = private_iptr;
   return ret;
 }
+
+#endif // __CLC_SCALAR