r210449 - [PPC64LE] Implement little-endian semantics for vec_sums

Sun Jun 8 20:31:48 PDT 2014

Author: wschmidt
Date: Sun Jun  8 22:31:47 2014
New Revision: 210449

URL: http://llvm.org/viewvc/llvm-project?rev=210449&view=rev
Log:
[PPC64LE] Implement little-endian semantics for vec_sums

The PowerPC vsumsws instruction, accessed via vec_sums, is defined
architecturally with a big-endian bias, in that the second input vector
and the result always reference big-endian element 3 (little-endian
element 0).  For ease of porting, the programmer wants elements 3 in
both cases.

To provide this semantics, for little endian we generate a permute for
the second input vector prior to the vsumsws instruction, and generate
a permute for the result vector following the vsumsws instruction.

The correctness of this code is tested by the new sums.c test added in
a previous patch, as well as the modifications to
builtins-ppc-altivec.c in the present patch.

Modified:
    cfe/trunk/lib/Headers/altivec.h
    cfe/trunk/test/CodeGen/builtins-ppc-altivec.c

Modified: cfe/trunk/lib/Headers/altivec.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=210449&r1=210448&r2=210449&view=diff
==============================================================================

--- cfe/trunk/lib/Headers/altivec.h (original)
+++ cfe/trunk/lib/Headers/altivec.h Sun Jun  8 22:31:47 2014
@@ -8398,10 +8398,26 @@ vec_vsum2sws(vector int __a, vector int
 
 /* vec_sums */
 
+/* The vsumsws instruction has a big-endian bias, so that the second
+   input vector and the result always reference big-endian element 3
+   (little-endian element 0).  For ease of porting the programmer
+   wants element 3 in both cases, so for little endian we must perform
+   some permutes.  */
+
 static vector signed int __attribute__((__always_inline__))
 vec_sums(vector signed int __a, vector signed int __b)
 {
+#ifdef __LITTLE_ENDIAN__
+  __b = (vector signed int)
+    vec_perm(__b, __b, (vector unsigned char)
+	     (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+  __b = __builtin_altivec_vsumsws(__a, __b);
+  return (vector signed int)
+    vec_perm(__b, __b, (vector unsigned char)
+	     (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+#else
   return __builtin_altivec_vsumsws(__a, __b);
+#endif
 }
 
 /* vec_vsumsws */
@@ -8409,7 +8425,17 @@ vec_sums(vector signed int __a, vector s
 static vector signed int __attribute__((__always_inline__))
 vec_vsumsws(vector signed int __a, vector signed int __b)
 {
+#ifdef __LITTLE_ENDIAN__
+  __b = (vector signed int)
+    vec_perm(__b, __b, (vector unsigned char)
+	     (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+  __b = __builtin_altivec_vsumsws(__a, __b);
+  return (vector signed int)
+    vec_perm(__b, __b, (vector unsigned char)
+	     (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+#else
   return __builtin_altivec_vsumsws(__a, __b);
+#endif
 }
 
 /* vec_trunc */

Modified: cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-altivec.c?rev=210449&r1=210448&r2=210449&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-ppc-altivec.c (original)
+++ cfe/trunk/test/CodeGen/builtins-ppc-altivec.c Sun Jun  8 22:31:47 2014
@@ -5155,11 +5155,15 @@ void test6() {
   /* vec_sums */
   res_vi = vec_sums(vi, vi);
 // CHECK: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
 // CHECK-LE: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vi = vec_vsumsws(vi, vi);
 // CHECK: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
 // CHECK-LE: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_trunc */
   res_vf = vec_trunc(vf);