r210657 - [PPC64LE] Fix vec_sld and vec_vsldoi for little endian

Bill Schmidt wschmidt at linux.vnet.ibm.com
Wed Jun 11 08:48:46 PDT 2014


Author: wschmidt
Date: Wed Jun 11 10:48:46 2014
New Revision: 210657

URL: http://llvm.org/viewvc/llvm-project?rev=210657&view=rev
Log:
[PPC64LE] Fix vec_sld and vec_vsldoi for little endian

The vec_sld and vec_vsldoi interfaces perform a left-shift on vector
arguments for both big and little endian.  However, because they rely
on the vec_perm interface which is endian-dependent, the permutation
vector needs to be reversed for LE to get the proper shift direction.

I've added some extra testing for these interfaces for LE in the
builtins-ppc-altivec.c.

Modified:
    cfe/trunk/lib/Headers/altivec.h
    cfe/trunk/test/CodeGen/builtins-ppc-altivec.c

Modified: cfe/trunk/lib/Headers/altivec.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=210657&r1=210656&r2=210657&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/altivec.h (original)
+++ cfe/trunk/lib/Headers/altivec.h Wed Jun 11 10:48:46 2014
@@ -5224,65 +5224,113 @@ vec_vslw(vector unsigned int __a, vector
 static vector signed char __ATTRS_o_ai
 vec_sld(vector signed char __a, vector signed char __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector unsigned char __ATTRS_o_ai
 vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector short __ATTRS_o_ai
 vec_sld(vector short __a, vector short __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector unsigned short __ATTRS_o_ai
 vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector pixel __ATTRS_o_ai
 vec_sld(vector pixel __a, vector pixel __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector int __ATTRS_o_ai
 vec_sld(vector int __a, vector int __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector unsigned int __ATTRS_o_ai
 vec_sld(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector float __ATTRS_o_ai
 vec_sld(vector float __a, vector float __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 /* vec_vsldoi */
@@ -5290,65 +5338,113 @@ vec_sld(vector float __a, vector float _
 static vector signed char __ATTRS_o_ai
 vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector unsigned char __ATTRS_o_ai
 vec_vsldoi(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector short __ATTRS_o_ai
 vec_vsldoi(vector short __a, vector short __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector unsigned short __ATTRS_o_ai
 vec_vsldoi(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector pixel __ATTRS_o_ai
 vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector int __ATTRS_o_ai
 vec_vsldoi(vector int __a, vector int __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector unsigned int __ATTRS_o_ai
 vec_vsldoi(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 static vector float __ATTRS_o_ai
 vec_vsldoi(vector float __a, vector float __b, unsigned char __c)
 {
+#ifdef __LITTLE_ENDIAN__
+  return vec_perm(__a, __b, (vector unsigned char)
+    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
+     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
+#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+#endif
 }
 
 /* vec_sll */

Modified: cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-altivec.c?rev=210657&r1=210656&r2=210657&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-ppc-altivec.c (original)
+++ cfe/trunk/test/CodeGen/builtins-ppc-altivec.c Wed Jun 11 10:48:46 2014
@@ -3258,66 +3258,98 @@ void test6() {
   /* vec_sld */
   res_vsc = vec_sld(vsc, vsc, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vuc = vec_sld(vuc, vuc, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vs  = vec_sld(vs, vs, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vus = vec_sld(vus, vus, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vp  = vec_sld(vp, vp, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vi  = vec_sld(vi, vi, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vui = vec_sld(vui, vui, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vf  = vec_sld(vf, vf, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vsc = vec_vsldoi(vsc, vsc, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vuc = vec_vsldoi(vuc, vuc, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vs  = vec_vsldoi(vs, vs, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vus = vec_vsldoi(vus, vus, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vp  = vec_vsldoi(vp, vp, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vi  = vec_vsldoi(vi, vi, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vui = vec_vsldoi(vui, vui, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vf  = vec_vsldoi(vf, vf, 0);
 // CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
+// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_sll */





More information about the cfe-commits mailing list