r214801 - [PPC64LE] Fix wrong IR for vec_sld and vec_vsldoi

Bill Schmidt wschmidt at linux.vnet.ibm.com
Mon Aug 4 16:21:26 PDT 2014


Author: wschmidt
Date: Mon Aug  4 18:21:26 2014
New Revision: 214801

URL: http://llvm.org/viewvc/llvm-project?rev=214801&view=rev
Log:
[PPC64LE] Fix wrong IR for vec_sld and vec_vsldoi

My original LE implementation of the vsldoi instruction, with its
altivec.h interfaces vec_sld and vec_vsldoi, produces incorrect
shufflevector operations in the LLVM IR.  Correct code is generated
because the back end handles the incorrect shufflevector in a
consistent manner.

This patch and a companion patch for LLVM correct this problem by
removing the fixup from altivec.h and the corresponding fixup from the
PowerPC back end.  Several test cases are also modified to reflect the
now-correct LLVM IR.

The vec_sums and vec_vsumsws interfaces in altivec.h are also fixed,
because they used vec_perm calls intended to be recognized as vsldoi
instructions.  These vec_perm calls are now replaced with code that
more clearly shows the intent of the transformation.


Modified:
    cfe/trunk/lib/Headers/altivec.h
    cfe/trunk/test/CodeGen/builtins-ppc-altivec.c

Modified: cfe/trunk/lib/Headers/altivec.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=214801&r1=214800&r2=214801&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/altivec.h (original)
+++ cfe/trunk/lib/Headers/altivec.h Mon Aug  4 18:21:26 2014
@@ -5224,113 +5224,65 @@ vec_vslw(vector unsigned int __a, vector
 static vector signed char __ATTRS_o_ai
 vec_sld(vector signed char __a, vector signed char __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned char __ATTRS_o_ai
 vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector short __ATTRS_o_ai
 vec_sld(vector short __a, vector short __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned short __ATTRS_o_ai
 vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector pixel __ATTRS_o_ai
 vec_sld(vector pixel __a, vector pixel __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector int __ATTRS_o_ai
 vec_sld(vector int __a, vector int __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned int __ATTRS_o_ai
 vec_sld(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector float __ATTRS_o_ai
 vec_sld(vector float __a, vector float __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 /* vec_vsldoi */
@@ -5338,113 +5290,65 @@ vec_sld(vector float __a, vector float _
 static vector signed char __ATTRS_o_ai
 vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned char __ATTRS_o_ai
 vec_vsldoi(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector short __ATTRS_o_ai
 vec_vsldoi(vector short __a, vector short __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned short __ATTRS_o_ai
 vec_vsldoi(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector pixel __ATTRS_o_ai
 vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector int __ATTRS_o_ai
 vec_vsldoi(vector int __a, vector int __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned int __ATTRS_o_ai
 vec_vsldoi(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector float __ATTRS_o_ai
 vec_vsldoi(vector float __a, vector float __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 /* vec_sll */
@@ -8504,13 +8408,9 @@ static vector signed int __attribute__((
 vec_sums(vector signed int __a, vector signed int __b)
 {
 #ifdef __LITTLE_ENDIAN__
-  __b = (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-	     (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+  __b = (vector signed int)vec_splat(__b, 3);
   __b = __builtin_altivec_vsumsws(__a, __b);
-  return (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-	     (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+  return (vector signed int)(0, 0, 0, __b[0]);
 #else
   return __builtin_altivec_vsumsws(__a, __b);
 #endif
@@ -8522,13 +8422,9 @@ static vector signed int __attribute__((
 vec_vsumsws(vector signed int __a, vector signed int __b)
 {
 #ifdef __LITTLE_ENDIAN__
-  __b = (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-	     (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+  __b = (vector signed int)vec_splat(__b, 3);
   __b = __builtin_altivec_vsumsws(__a, __b);
-  return (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-	     (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+  return (vector signed int)(0, 0, 0, __b[0]);
 #else
   return __builtin_altivec_vsumsws(__a, __b);
 #endif

Modified: cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-altivec.c?rev=214801&r1=214800&r2=214801&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-ppc-altivec.c (original)
+++ cfe/trunk/test/CodeGen/builtins-ppc-altivec.c Mon Aug  4 18:21:26 2014
@@ -3258,98 +3258,66 @@ void test6() {
   /* vec_sld */
   res_vsc = vec_sld(vsc, vsc, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vuc = vec_sld(vuc, vuc, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vs  = vec_sld(vs, vs, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vus = vec_sld(vus, vus, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vp  = vec_sld(vp, vp, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vi  = vec_sld(vi, vi, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vui = vec_sld(vui, vui, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vf  = vec_sld(vf, vf, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vsc = vec_vsldoi(vsc, vsc, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vuc = vec_vsldoi(vuc, vuc, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vs  = vec_vsldoi(vs, vs, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vus = vec_vsldoi(vus, vus, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vp  = vec_vsldoi(vp, vp, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vi  = vec_vsldoi(vi, vi, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vui = vec_vsldoi(vui, vui, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vf  = vec_vsldoi(vf, vf, 0);
 // CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
 // CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_sll */
@@ -5189,13 +5157,11 @@ void test6() {
 // CHECK: @llvm.ppc.altivec.vsumsws
 // CHECK-LE: @llvm.ppc.altivec.vperm
 // CHECK-LE: @llvm.ppc.altivec.vsumsws
-// CHECK-LE: @llvm.ppc.altivec.vperm
 
   res_vi = vec_vsumsws(vi, vi);
 // CHECK: @llvm.ppc.altivec.vsumsws
 // CHECK-LE: @llvm.ppc.altivec.vperm
 // CHECK-LE: @llvm.ppc.altivec.vsumsws
-// CHECK-LE: @llvm.ppc.altivec.vperm
 
   /* vec_trunc */
   res_vf = vec_trunc(vf);





More information about the cfe-commits mailing list