r210449 - [PPC64LE] Implement little-endian semantics for vec_sums
Bill Schmidt
wschmidt at linux.vnet.ibm.com
Sun Jun 8 20:31:48 PDT 2014
Author: wschmidt
Date: Sun Jun 8 22:31:47 2014
New Revision: 210449
URL: http://llvm.org/viewvc/llvm-project?rev=210449&view=rev
Log:
[PPC64LE] Implement little-endian semantics for vec_sums
The PowerPC vsumsws instruction, accessed via vec_sums, is defined
architecturally with a big-endian bias, in that the second input vector
and the result always reference big-endian element 3 (little-endian
element 0). For ease of porting, the programmer wants elements 3 in
both cases.
To provide this semantics, for little endian we generate a permute for
the second input vector prior to the vsumsws instruction, and generate
a permute for the result vector following the vsumsws instruction.
The correctness of this code is tested by the new sums.c test added in
a previous patch, as well as the modifications to
builtins-ppc-altivec.c in the present patch.
Modified:
cfe/trunk/lib/Headers/altivec.h
cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
Modified: cfe/trunk/lib/Headers/altivec.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=210449&r1=210448&r2=210449&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/altivec.h (original)
+++ cfe/trunk/lib/Headers/altivec.h Sun Jun 8 22:31:47 2014
@@ -8398,10 +8398,26 @@ vec_vsum2sws(vector int __a, vector int
/* vec_sums */
+/* The vsumsws instruction has a big-endian bias, so that the second
+ input vector and the result always reference big-endian element 3
+ (little-endian element 0). For ease of porting the programmer
+ wants element 3 in both cases, so for little endian we must perform
+ some permutes. */
+
static vector signed int __attribute__((__always_inline__))
vec_sums(vector signed int __a, vector signed int __b)
{
+#ifdef __LITTLE_ENDIAN__
+ __b = (vector signed int)
+ vec_perm(__b, __b, (vector unsigned char)
+ (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+ __b = __builtin_altivec_vsumsws(__a, __b);
+ return (vector signed int)
+ vec_perm(__b, __b, (vector unsigned char)
+ (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+#else
return __builtin_altivec_vsumsws(__a, __b);
+#endif
}
/* vec_vsumsws */
@@ -8409,7 +8425,17 @@ vec_sums(vector signed int __a, vector s
static vector signed int __attribute__((__always_inline__))
vec_vsumsws(vector signed int __a, vector signed int __b)
{
+#ifdef __LITTLE_ENDIAN__
+ __b = (vector signed int)
+ vec_perm(__b, __b, (vector unsigned char)
+ (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+ __b = __builtin_altivec_vsumsws(__a, __b);
+ return (vector signed int)
+ vec_perm(__b, __b, (vector unsigned char)
+ (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+#else
return __builtin_altivec_vsumsws(__a, __b);
+#endif
}
/* vec_trunc */
Modified: cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-altivec.c?rev=210449&r1=210448&r2=210449&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-ppc-altivec.c (original)
+++ cfe/trunk/test/CodeGen/builtins-ppc-altivec.c Sun Jun 8 22:31:47 2014
@@ -5155,11 +5155,15 @@ void test6() {
/* vec_sums */
res_vi = vec_sums(vi, vi);
// CHECK: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
res_vi = vec_vsumsws(vi, vi);
// CHECK: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
/* vec_trunc */
res_vf = vec_trunc(vf);
More information about the cfe-commits
mailing list