[Libclc-dev] [PATCH 1/9] RFC: Refactor clcmacro.h to vectorize without hi/lo

Aaron Watry awatry at gmail.com
Tue Jul 22 18:46:42 PDT 2014


There are odd things happening with 16x vectors in nextafter() and sign().

When I changed float16 load/store to use the assembly path in later
patches instead of the macro-vectorized version, nextafter and sign
stopped working (next 2 commits/patches).

As near as I can tell, we're getting correct results for
elements 0-7, but then elements 8+ are wrong. The final result
seems to be composed of the first 8 elements of the computed
result, and then elements 16-23, which are likely uninitialized.

I'd like to say the issue is in clang, but I have nothing to back
that up at the moment and give that this patch fixes the issue, I’m
not sure how much time I want to spend investigating right now.

Explicitly splitting all of the vectorize macros the way that this patch
does gets everything working again, but I have a feeling that we're
papering over a bug somewhere, hence the RFC subject.

I’m not sure what’s going on here, and it’s only the nextafter/sign
functions that regressed. This patch fixes the test results in piglit.

No significant change on number of instructions in bitcode
for nextafter float16 (2-3 instructions savings over ~350 lines).

Signed-off-by: Aaron Watry <awatry at gmail.com>
---
 generic/lib/clcmacro.h | 84 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 73 insertions(+), 11 deletions(-)

diff --git a/generic/lib/clcmacro.h b/generic/lib/clcmacro.h
index 730073a..64b1770 100644
--- a/generic/lib/clcmacro.h
+++ b/generic/lib/clcmacro.h
@@ -1,44 +1,106 @@
 #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
-    return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
+    return (RET_TYPE##2){FUNCTION(x.s0), FUNCTION(x.s1)}; \
   } \
 \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
-    return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
+    return (RET_TYPE##3){FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)}; \
   } \
 \
   DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
-    return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+    return (RET_TYPE##4){ \
+      FUNCTION(x.s0), \
+      FUNCTION(x.s1), \
+      FUNCTION(x.s2), \
+      FUNCTION(x.s3), \
+    }; \
   } \
 \
   DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
-    return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+    return (RET_TYPE##8){ \
+      FUNCTION(x.s0), \
+      FUNCTION(x.s1), \
+      FUNCTION(x.s2), \
+      FUNCTION(x.s3), \
+      FUNCTION(x.s4), \
+      FUNCTION(x.s5), \
+      FUNCTION(x.s6), \
+      FUNCTION(x.s7), \
+    }; \
   } \
 \
   DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
-    return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
+    return (RET_TYPE##16){ \
+      FUNCTION(x.s0), \
+      FUNCTION(x.s1), \
+      FUNCTION(x.s2), \
+      FUNCTION(x.s3), \
+      FUNCTION(x.s4), \
+      FUNCTION(x.s5), \
+      FUNCTION(x.s6), \
+      FUNCTION(x.s7), \
+      FUNCTION(x.s8), \
+      FUNCTION(x.s9), \
+      FUNCTION(x.sa), \
+      FUNCTION(x.sb), \
+      FUNCTION(x.sc), \
+      FUNCTION(x.sd), \
+      FUNCTION(x.se), \
+      FUNCTION(x.sf) \
+    }; \
   }
 
 #define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
-    return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
+    return (RET_TYPE##2){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1)}; \
   } \
 \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
-    return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
-                         FUNCTION(x.z, y.z)); \
+    return (RET_TYPE##3){FUNCTION(x.s0, y.s0), FUNCTION(x.s1, y.s1), \
+                         FUNCTION(x.s2, y.s2)}; \
   } \
 \
   DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
-    return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+    return (RET_TYPE##4){ \
+      FUNCTION(x.s0, y.s0), \
+      FUNCTION(x.s1, y.s1), \
+      FUNCTION(x.s2, y.s2), \
+      FUNCTION(x.s3, y.s3), \
+    }; \
   } \
 \
   DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
-    return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+    return (RET_TYPE##8){ \
+      FUNCTION(x.s0, y.s0), \
+      FUNCTION(x.s1, y.s1), \
+      FUNCTION(x.s2, y.s2), \
+      FUNCTION(x.s3, y.s3), \
+      FUNCTION(x.s4, y.s4), \
+      FUNCTION(x.s5, y.s5), \
+      FUNCTION(x.s6, y.s6), \
+      FUNCTION(x.s7, y.s7), \
+    }; \
   } \
 \
   DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
-    return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
+    return (RET_TYPE##16){ \
+      FUNCTION(x.s0, y.s0), \
+      FUNCTION(x.s1, y.s1), \
+      FUNCTION(x.s2, y.s2), \
+      FUNCTION(x.s3, y.s3), \
+      FUNCTION(x.s4, y.s4), \
+      FUNCTION(x.s5, y.s5), \
+      FUNCTION(x.s6, y.s6), \
+      FUNCTION(x.s7, y.s7), \
+      FUNCTION(x.s8, y.s8), \
+      FUNCTION(x.s9, y.s9), \
+      FUNCTION(x.sa, y.sa), \
+      FUNCTION(x.sb, y.sb), \
+      FUNCTION(x.sc, y.sc), \
+      FUNCTION(x.sd, y.sd), \
+      FUNCTION(x.se, y.se), \
+      FUNCTION(x.sf, y.sf) \
+    }; \
   }
 
 #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
-- 
1.9.1





More information about the Libclc-dev mailing list