r210657 - [PPC64LE] Fix vec_sld and vec_vsldoi for little endian
Bill Schmidt
wschmidt at linux.vnet.ibm.com
Wed Jun 11 08:48:46 PDT 2014
Author: wschmidt
Date: Wed Jun 11 10:48:46 2014
New Revision: 210657
URL: http://llvm.org/viewvc/llvm-project?rev=210657&view=rev
Log:
[PPC64LE] Fix vec_sld and vec_vsldoi for little endian
The vec_sld and vec_vsldoi interfaces perform a left-shift on vector
arguments for both big and little endian. However, because they rely
on the vec_perm interface which is endian-dependent, the permutation
vector needs to be reversed for LE to get the proper shift direction.
I've added some extra testing for these interfaces for LE in the
builtins-ppc-altivec.c.
Modified:
cfe/trunk/lib/Headers/altivec.h
cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
Modified: cfe/trunk/lib/Headers/altivec.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=210657&r1=210656&r2=210657&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/altivec.h (original)
+++ cfe/trunk/lib/Headers/altivec.h Wed Jun 11 10:48:46 2014
@@ -5224,65 +5224,113 @@ vec_vslw(vector unsigned int __a, vector
static vector signed char __ATTRS_o_ai
vec_sld(vector signed char __a, vector signed char __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector unsigned char __ATTRS_o_ai
vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector short __ATTRS_o_ai
vec_sld(vector short __a, vector short __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector unsigned short __ATTRS_o_ai
vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector pixel __ATTRS_o_ai
vec_sld(vector pixel __a, vector pixel __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector int __ATTRS_o_ai
vec_sld(vector int __a, vector int __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector unsigned int __ATTRS_o_ai
vec_sld(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector float __ATTRS_o_ai
vec_sld(vector float __a, vector float __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
/* vec_vsldoi */
@@ -5290,65 +5338,113 @@ vec_sld(vector float __a, vector float _
static vector signed char __ATTRS_o_ai
vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector unsigned char __ATTRS_o_ai
vec_vsldoi(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector short __ATTRS_o_ai
vec_vsldoi(vector short __a, vector short __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector unsigned short __ATTRS_o_ai
vec_vsldoi(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector pixel __ATTRS_o_ai
vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector int __ATTRS_o_ai
vec_vsldoi(vector int __a, vector int __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector unsigned int __ATTRS_o_ai
vec_vsldoi(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
static vector float __ATTRS_o_ai
vec_vsldoi(vector float __a, vector float __b, unsigned char __c)
{
+#ifdef __LITTLE_ENDIAN__
+ return vec_perm(__a, __b, (vector unsigned char)
+ (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+ __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
}
/* vec_sll */
Modified: cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-altivec.c?rev=210657&r1=210656&r2=210657&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-ppc-altivec.c (original)
+++ cfe/trunk/test/CodeGen/builtins-ppc-altivec.c Wed Jun 11 10:48:46 2014
@@ -3258,66 +3258,98 @@ void test6() {
/* vec_sld */
res_vsc = vec_sld(vsc, vsc, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vuc = vec_sld(vuc, vuc, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vs = vec_sld(vs, vs, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vus = vec_sld(vus, vus, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vp = vec_sld(vp, vp, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vi = vec_sld(vi, vi, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vui = vec_sld(vui, vui, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vf = vec_sld(vf, vf, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vsc = vec_vsldoi(vsc, vsc, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vuc = vec_vsldoi(vuc, vuc, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vs = vec_vsldoi(vs, vs, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vus = vec_vsldoi(vus, vus, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vp = vec_vsldoi(vp, vp, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vi = vec_vsldoi(vi, vi, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vui = vec_vsldoi(vui, vui, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vf = vec_vsldoi(vf, vf, 0);
// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
/* vec_sll */
More information about the cfe-commits
mailing list