r214801 - [PPC64LE] Fix wrong IR for vec_sld and vec_vsldoi
Bill Schmidt
wschmidt at linux.vnet.ibm.com
Mon Aug 4 16:21:26 PDT 2014
Author: wschmidt
Date: Mon Aug 4 18:21:26 2014
New Revision: 214801
URL: http://llvm.org/viewvc/llvm-project?rev=214801&view=rev
Log:
[PPC64LE] Fix wrong IR for vec_sld and vec_vsldoi
My original LE implementation of the vsldoi instruction, with its
altivec.h interfaces vec_sld and vec_vsldoi, produces incorrect
shufflevector operations in the LLVM IR. Correct code is generated
because the back end handles the incorrect shufflevector in a
consistent manner.
This patch and a companion patch for LLVM correct this problem by
removing the fixup from altivec.h and the corresponding fixup from the
PowerPC back end. Several test cases are also modified to reflect the
now-correct LLVM IR.
The vec_sums and vec_vsumsws interfaces in altivec.h are also fixed,
because they used vec_perm calls intended to be recognized as vsldoi
instructions. These vec_perm calls are now replaced with code that
more clearly shows the intent of the transformation.
Modified:
cfe/trunk/lib/Headers/altivec.h
cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
Modified: cfe/trunk/lib/Headers/altivec.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=214801&r1=214800&r2=214801&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/altivec.h (original)
+++ cfe/trunk/lib/Headers/altivec.h Mon Aug 4 18:21:26 2014
@@ -5224,113 +5224,65 @@ vec_vslw(vector unsigned int __a, vector
static vector signed char __ATTRS_o_ai
vec_sld(vector signed char __a, vector signed char __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector unsigned char __ATTRS_o_ai
vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector short __ATTRS_o_ai
vec_sld(vector short __a, vector short __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector unsigned short __ATTRS_o_ai
vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector pixel __ATTRS_o_ai
vec_sld(vector pixel __a, vector pixel __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector int __ATTRS_o_ai
vec_sld(vector int __a, vector int __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector unsigned int __ATTRS_o_ai
vec_sld(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector float __ATTRS_o_ai
vec_sld(vector float __a, vector float __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
/* vec_vsldoi */
@@ -5338,113 +5290,65 @@ vec_sld(vector float __a, vector float _
static vector signed char __ATTRS_o_ai
vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector unsigned char __ATTRS_o_ai
vec_vsldoi(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector short __ATTRS_o_ai
vec_vsldoi(vector short __a, vector short __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector unsigned short __ATTRS_o_ai
vec_vsldoi(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector pixel __ATTRS_o_ai
vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector int __ATTRS_o_ai
vec_vsldoi(vector int __a, vector int __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector unsigned int __ATTRS_o_ai
vec_vsldoi(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
static vector float __ATTRS_o_ai
vec_vsldoi(vector float __a, vector float __b, unsigned char __c)
{
-#ifdef __LITTLE_ENDIAN__
- return vec_perm(__a, __b, (vector unsigned char)
- (__c, __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
- __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
return vec_perm(__a, __b, (vector unsigned char)
(__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
__c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
}
/* vec_sll */
@@ -8504,13 +8408,9 @@ static vector signed int __attribute__((
vec_sums(vector signed int __a, vector signed int __b)
{
#ifdef __LITTLE_ENDIAN__
- __b = (vector signed int)
- vec_perm(__b, __b, (vector unsigned char)
- (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+ __b = (vector signed int)vec_splat(__b, 3);
__b = __builtin_altivec_vsumsws(__a, __b);
- return (vector signed int)
- vec_perm(__b, __b, (vector unsigned char)
- (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+ return (vector signed int)(0, 0, 0, __b[0]);
#else
return __builtin_altivec_vsumsws(__a, __b);
#endif
@@ -8522,13 +8422,9 @@ static vector signed int __attribute__((
vec_vsumsws(vector signed int __a, vector signed int __b)
{
#ifdef __LITTLE_ENDIAN__
- __b = (vector signed int)
- vec_perm(__b, __b, (vector unsigned char)
- (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+ __b = (vector signed int)vec_splat(__b, 3);
__b = __builtin_altivec_vsumsws(__a, __b);
- return (vector signed int)
- vec_perm(__b, __b, (vector unsigned char)
- (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+ return (vector signed int)(0, 0, 0, __b[0]);
#else
return __builtin_altivec_vsumsws(__a, __b);
#endif
Modified: cfe/trunk/test/CodeGen/builtins-ppc-altivec.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-altivec.c?rev=214801&r1=214800&r2=214801&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-ppc-altivec.c (original)
+++ cfe/trunk/test/CodeGen/builtins-ppc-altivec.c Mon Aug 4 18:21:26 2014
@@ -3258,98 +3258,66 @@ void test6() {
/* vec_sld */
res_vsc = vec_sld(vsc, vsc, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vuc = vec_sld(vuc, vuc, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vs = vec_sld(vs, vs, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vus = vec_sld(vus, vus, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vp = vec_sld(vp, vp, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vi = vec_sld(vi, vi, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vui = vec_sld(vui, vui, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vf = vec_sld(vf, vf, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vsc = vec_vsldoi(vsc, vsc, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vuc = vec_vsldoi(vuc, vuc, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vs = vec_vsldoi(vs, vs, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vus = vec_vsldoi(vus, vus, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vp = vec_vsldoi(vp, vp, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vi = vec_vsldoi(vi, vi, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vui = vec_vsldoi(vui, vui, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
res_vf = vec_vsldoi(vf, vf, 0);
// CHECK: @llvm.ppc.altivec.vperm
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 1
-// CHECK-LE: sub nsw i32 {{[%_.a-z0-9]+}}, 15
// CHECK-LE: @llvm.ppc.altivec.vperm
/* vec_sll */
@@ -5189,13 +5157,11 @@ void test6() {
// CHECK: @llvm.ppc.altivec.vsumsws
// CHECK-LE: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vsumsws
-// CHECK-LE: @llvm.ppc.altivec.vperm
res_vi = vec_vsumsws(vi, vi);
// CHECK: @llvm.ppc.altivec.vsumsws
// CHECK-LE: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vsumsws
-// CHECK-LE: @llvm.ppc.altivec.vperm
/* vec_trunc */
res_vf = vec_trunc(vf);
More information about the cfe-commits
mailing list