[libclc] 299a278 - [libclc] Improving vector code generated from scalar code (#140008)

via cfe-commits cfe-commits at lists.llvm.org
Fri May 16 02:20:36 PDT 2025


Author: Wenju He
Date: 2025-05-16T10:20:32+01:00
New Revision: 299a278db16fa0944472af79bfec31dd678c5b37

URL: https://github.com/llvm/llvm-project/commit/299a278db16fa0944472af79bfec31dd678c5b37
DIFF: https://github.com/llvm/llvm-project/commit/299a278db16fa0944472af79bfec31dd678c5b37.diff

LOG: [libclc] Improving vector code generated from scalar code (#140008)

The previous method splits vector data into two halves. shuffle_vector
concatenates the two results into a vector data of original size. This
PR eliminates the use of shuffle_vector.

Added: 
    

Modified: 
    libclc/clc/include/clc/clcmacro.h
    libclc/clc/lib/generic/math/clc_lgamma_r.cl

Removed: 
    


################################################################################
diff  --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h
index d8772ce38792a..c9f70d2998d37 100644
--- a/libclc/clc/include/clc/clcmacro.h
+++ b/libclc/clc/include/clc/clcmacro.h
@@ -14,100 +14,140 @@
 
 #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)          \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) {                              \
-    return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y));                        \
+    return (RET_TYPE##2)(FUNCTION(x.s0), FUNCTION(x.s1));                      \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) {                              \
-    return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z));         \
+    return (RET_TYPE##3)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2));      \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) {                              \
-    return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi));                      \
+    return (RET_TYPE##4)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2),       \
+                         FUNCTION(x.s3));                                      \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) {                              \
-    return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi));                      \
+    return (RET_TYPE##8)(FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2),       \
+                         FUNCTION(x.s3), FUNCTION(x.s4), FUNCTION(x.s5),       \
+                         FUNCTION(x.s6), FUNCTION(x.s7));                      \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) {                            \
-    return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi));                     \
+    return (RET_TYPE##16)(                                                     \
+        FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3),        \
+        FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7),        \
+        FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb),        \
+        FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf));       \
   }
 
 #define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,         \
                               ARG2_TYPE)                                       \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) {              \
-    return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y));              \
+    return (RET_TYPE##2)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1));          \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) {              \
-    return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y),               \
-                         FUNCTION(x.z, y.z));                                  \
+    return (RET_TYPE##3)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1),           \
+                         FUNCTION(x.s2, y.s2));                                \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) {              \
-    return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi));          \
+    return (RET_TYPE##4)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1),           \
+                         FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3));          \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) {              \
-    return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi));          \
+    return (RET_TYPE##8)(FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1),           \
+                         FUNCTION(x.s2, y.s2), FUNCTION(x.s3, y.s3),           \
+                         FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5),           \
+                         FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7));          \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) {           \
-    return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi));         \
+    return (RET_TYPE##16)(                                                     \
+        FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), FUNCTION(x.s2, y.s2),      \
+        FUNCTION(x.s3, y.s3), FUNCTION(x.s4, y.s4), FUNCTION(x.s5, y.s5),      \
+        FUNCTION(x.s6, y.s6), FUNCTION(x.s7, y.s7), FUNCTION(x.s8, y.s8),      \
+        FUNCTION(x.s9, y.s9), FUNCTION(x.sa, y.sa), FUNCTION(x.sb, y.sb),      \
+        FUNCTION(x.sc, y.sc), FUNCTION(x.sd, y.sd), FUNCTION(x.se, y.se),      \
+        FUNCTION(x.sf, y.sf));                                                 \
   }
 
 #define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,          \
                              ARG2_TYPE)                                        \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) {                 \
-    return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi));                \
+    return (RET_TYPE##2)(FUNCTION(x, y.s0), FUNCTION(x, y.s1));                \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE x, ARG2_TYPE##3 y) {                 \
-    return (RET_TYPE##3)(FUNCTION(x, y.x), FUNCTION(x, y.y),                   \
-                         FUNCTION(x, y.z));                                    \
+    return (RET_TYPE##3)(FUNCTION(x, y.s0), FUNCTION(x, y.s1),                 \
+                         FUNCTION(x, y.s2));                                   \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE x, ARG2_TYPE##4 y) {                 \
-    return (RET_TYPE##4)(FUNCTION(x, y.lo), FUNCTION(x, y.hi));                \
+    return (RET_TYPE##4)(FUNCTION(x, y.s0), FUNCTION(x, y.s1),                 \
+                         FUNCTION(x, y.s2), FUNCTION(x, y.s3));                \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE x, ARG2_TYPE##8 y) {                 \
-    return (RET_TYPE##8)(FUNCTION(x, y.lo), FUNCTION(x, y.hi));                \
+    return (RET_TYPE##8)(FUNCTION(x, y.s0), FUNCTION(x, y.s1),                 \
+                         FUNCTION(x, y.s2), FUNCTION(x, y.s3),                 \
+                         FUNCTION(x, y.s4), FUNCTION(x, y.s5),                 \
+                         FUNCTION(x, y.s6), FUNCTION(x, y.s7));                \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE x, ARG2_TYPE##16 y) {               \
-    return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi));               \
+    return (RET_TYPE##16)(                                                     \
+        FUNCTION(x, y.s0), FUNCTION(x, y.s1), FUNCTION(x, y.s2),               \
+        FUNCTION(x, y.s3), FUNCTION(x, y.s4), FUNCTION(x, y.s5),               \
+        FUNCTION(x, y.s6), FUNCTION(x, y.s7), FUNCTION(x, y.s8),               \
+        FUNCTION(x, y.s9), FUNCTION(x, y.sa), FUNCTION(x, y.sb),               \
+        FUNCTION(x, y.sc), FUNCTION(x, y.sd), FUNCTION(x, y.se),               \
+        FUNCTION(x, y.sf));                                                    \
   }
 
 #define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,        \
                                ARG2_TYPE, ARG3_TYPE)                           \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y,                \
                                 ARG3_TYPE##2 z) {                              \
-    return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y));    \
+    return (RET_TYPE##2)(FUNCTION(x.s0, y.s0, z.s0),                           \
+                         FUNCTION(x.s1, y.s1, z.s1));                          \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y,                \
                                 ARG3_TYPE##3 z) {                              \
-    return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y),     \
-                         FUNCTION(x.z, y.z, z.z));                             \
+    return (RET_TYPE##3)(FUNCTION(x.s0, y.s0, z.s0),                           \
+                         FUNCTION(x.s1, y.s1, z.s1),                           \
+                         FUNCTION(x.s2, y.s2, z.s2));                          \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y,                \
                                 ARG3_TYPE##4 z) {                              \
-    return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo),                           \
-                         FUNCTION(x.hi, y.hi, z.hi));                          \
+    return (RET_TYPE##4)(                                                      \
+        FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1),                \
+        FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3));               \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y,                \
                                 ARG3_TYPE##8 z) {                              \
-    return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo),                           \
-                         FUNCTION(x.hi, y.hi, z.hi));                          \
+    return (RET_TYPE##8)(                                                      \
+        FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1),                \
+        FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3),                \
+        FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5),                \
+        FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7));               \
   }                                                                            \
                                                                                \
   DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y,             \
                                  ARG3_TYPE##16 z) {                            \
-    return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo),                          \
-                          FUNCTION(x.hi, y.hi, z.hi));                         \
+    return (RET_TYPE##16)(                                                     \
+        FUNCTION(x.s0, y.s0, z.s0), FUNCTION(x.s1, y.s1, z.s1),                \
+        FUNCTION(x.s2, y.s2, z.s2), FUNCTION(x.s3, y.s3, z.s3),                \
+        FUNCTION(x.s4, y.s4, z.s4), FUNCTION(x.s5, y.s5, z.s5),                \
+        FUNCTION(x.s6, y.s6, z.s6), FUNCTION(x.s7, y.s7, z.s7),                \
+        FUNCTION(x.s8, y.s8, z.s8), FUNCTION(x.s9, y.s9, z.s9),                \
+        FUNCTION(x.sa, y.sa, z.sa), FUNCTION(x.sb, y.sb, z.sb),                \
+        FUNCTION(x.sc, y.sc, z.sc), FUNCTION(x.sd, y.sd, z.sd),                \
+        FUNCTION(x.se, y.se, z.se), FUNCTION(x.sf, y.sf, z.sf));               \
   }
 
 #define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,         \
@@ -115,48 +155,53 @@
   DECLSPEC __CLC_XCONCAT(RET_TYPE, 2)                                          \
       FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x,                                  \
                ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) {                   \
-    return (__CLC_XCONCAT(RET_TYPE, 2))(                                       \
-        FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y),                              \
-        FUNCTION(x.y,                                                          \
-                 (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)));    \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
+    return (__CLC_XCONCAT(RET_TYPE, 2))(FUNCTION(x.s0, ptr),                   \
+                                        FUNCTION(x.s1, ptr + 1));              \
   }                                                                            \
                                                                                \
   DECLSPEC __CLC_XCONCAT(RET_TYPE, 3)                                          \
       FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x,                                  \
                ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) {                   \
-    return (__CLC_XCONCAT(RET_TYPE, 3))(                                       \
-        FUNCTION(x.x, (ADDR_SPACE ARG2_TYPE *)y),                              \
-        FUNCTION(x.y,                                                          \
-                 (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 1)),     \
-        FUNCTION(x.z,                                                          \
-                 (ADDR_SPACE ARG2_TYPE *)((ADDR_SPACE ARG2_TYPE *)y + 2)));    \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
+    return (__CLC_XCONCAT(RET_TYPE, 3))(FUNCTION(x.s0, ptr),                   \
+                                        FUNCTION(x.s1, ptr + 1),               \
+                                        FUNCTION(x.s2, ptr + 2));              \
   }                                                                            \
                                                                                \
   DECLSPEC __CLC_XCONCAT(RET_TYPE, 4)                                          \
       FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x,                                  \
                ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) {                   \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
     return (__CLC_XCONCAT(RET_TYPE, 4))(                                       \
-        FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) *)y),           \
-        FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT(                              \
-                           ARG2_TYPE, 2) *)((ADDR_SPACE ARG2_TYPE *)y + 2)));  \
+        FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \
+        FUNCTION(x.s3, ptr + 3));                                              \
   }                                                                            \
                                                                                \
   DECLSPEC __CLC_XCONCAT(RET_TYPE, 8)                                          \
       FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x,                                  \
                ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) {                   \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
     return (__CLC_XCONCAT(RET_TYPE, 8))(                                       \
-        FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) *)y),           \
-        FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT(                              \
-                           ARG2_TYPE, 4) *)((ADDR_SPACE ARG2_TYPE *)y + 4)));  \
+        FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \
+        FUNCTION(x.s3, ptr + 3), FUNCTION(x.s4, ptr + 4),                      \
+        FUNCTION(x.s5, ptr + 5), FUNCTION(x.s6, ptr + 6),                      \
+        FUNCTION(x.s7, ptr + 7));                                              \
   }                                                                            \
                                                                                \
   DECLSPEC __CLC_XCONCAT(RET_TYPE, 16)                                         \
       FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x,                                 \
                ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) {                  \
+    ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y;                     \
     return (__CLC_XCONCAT(RET_TYPE, 16))(                                      \
-        FUNCTION(x.lo, (ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) *)y),           \
-        FUNCTION(x.hi, (ADDR_SPACE __CLC_XCONCAT(                              \
-                           ARG2_TYPE, 8) *)((ADDR_SPACE ARG2_TYPE *)y + 8)));  \
+        FUNCTION(x.s0, ptr), FUNCTION(x.s1, ptr + 1), FUNCTION(x.s2, ptr + 2), \
+        FUNCTION(x.s3, ptr + 3), FUNCTION(x.s4, ptr + 4),                      \
+        FUNCTION(x.s5, ptr + 5), FUNCTION(x.s6, ptr + 6),                      \
+        FUNCTION(x.s7, ptr + 7), FUNCTION(x.s8, ptr + 8),                      \
+        FUNCTION(x.s9, ptr + 9), FUNCTION(x.sa, ptr + 10),                     \
+        FUNCTION(x.sb, ptr + 11), FUNCTION(x.sc, ptr + 12),                    \
+        FUNCTION(x.sd, ptr + 13), FUNCTION(x.se, ptr + 14),                    \
+        FUNCTION(x.sf, ptr + 15));                                             \
   }
 
 #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE,     \

diff  --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
index ad3d63b734eca..96a42bbb6e158 100644
--- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl
+++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl
@@ -406,13 +406,13 @@ _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_lgamma_r, float,
 #define v4 1.04222645593369134254e-01 /* 0x3FBAAE55, 0xD6537C88 */
 #define v5 3.21709242282423911810e-03 /* 0x3F6A5ABB, 0x57D0CF61 */
 
-#define s0 -7.72156649015328655494e-02 /* 0xBFB3C467, 0xE37DB0C8 */
-#define s1 2.14982415960608852501e-01  /* 0x3FCB848B, 0x36E20878 */
-#define s2 3.25778796408930981787e-01  /* 0x3FD4D98F, 0x4F139F59 */
-#define s3 1.46350472652464452805e-01  /* 0x3FC2BB9C, 0xBEE5F2F7 */
-#define s4 2.66422703033638609560e-02  /* 0x3F9B481C, 0x7E939961 */
-#define s5 1.84028451407337715652e-03  /* 0x3F5E26B6, 0x7368F239 */
-#define s6 3.19475326584100867617e-05  /* 0x3F00BFEC, 0xDD17E945 */
+#define s0_d -7.72156649015328655494e-02 /* 0xBFB3C467, 0xE37DB0C8 */
+#define s1_d 2.14982415960608852501e-01  /* 0x3FCB848B, 0x36E20878 */
+#define s2_d 3.25778796408930981787e-01  /* 0x3FD4D98F, 0x4F139F59 */
+#define s3_d 1.46350472652464452805e-01  /* 0x3FC2BB9C, 0xBEE5F2F7 */
+#define s4_d 2.66422703033638609560e-02  /* 0x3F9B481C, 0x7E939961 */
+#define s5_d 1.84028451407337715652e-03  /* 0x3F5E26B6, 0x7368F239 */
+#define s6_d 3.19475326584100867617e-05  /* 0x3F00BFEC, 0xDD17E945 */
 
 #define r1 1.39200533467621045958e+00 /* 0x3FF645A7, 0x62C4AB74 */
 #define r2 7.21935547567138069525e-01 /* 0x3FE71A18, 0x93D3DCDC */
@@ -530,10 +530,12 @@ _CLC_OVERLOAD _CLC_DEF double __clc_lgamma_r(double x, private int *ip) {
             __clc_fma(
                 y,
                 __clc_fma(
-                    y, __clc_fma(y, __clc_fma(y, __clc_fma(y, s6, s5), s4), s3),
-                    s2),
-                s1),
-            s0);
+                    y,
+                    __clc_fma(y, __clc_fma(y, __clc_fma(y, s6_d, s5_d), s4_d),
+                              s3_d),
+                    s2_d),
+                s1_d),
+            s0_d);
     double q = __clc_fma(
         y,
         __clc_fma(


        


More information about the cfe-commits mailing list