[clang] ed87f51 - [PowerPC] Provide some P8-specific altivec overloads for P7
Nemanja Ivanovic via cfe-commits
cfe-commits at lists.llvm.org
Thu May 6 18:26:15 PDT 2021
Sorry about the noise this caused.
I somehow managed to look at the wrong log file after running the lit tests
prior to committing this. I have since fixed and re-committed this.
On Thu, May 6, 2021 at 10:01 AM Nico Weber <thakis at chromium.org> wrote:
> Reverted in 3761b9a2345aff197707d23a68d4a178489f60e4 for now.
>
> On Thu, May 6, 2021 at 9:58 AM Nico Weber <thakis at chromium.org> wrote:
>
>> Looks like this breaks tests: http://45.33.8.238/linux/45985/step_7.txt
>> , https://lab.llvm.org/buildbot/#/builders/139/builds/3818
>>
>> (Is there a phab link for this?)
>>
>> On Thu, May 6, 2021 at 9:37 AM Nemanja Ivanovic via cfe-commits <
>> cfe-commits at lists.llvm.org> wrote:
>>
>>>
>>> Author: Nemanja Ivanovic
>>> Date: 2021-05-06T08:37:36-05:00
>>> New Revision: ed87f512bb9eb5c1d44e9a1182ffeaf23d6c5ae8
>>>
>>> URL:
>>> https://github.com/llvm/llvm-project/commit/ed87f512bb9eb5c1d44e9a1182ffeaf23d6c5ae8
>>> DIFF:
>>> https://github.com/llvm/llvm-project/commit/ed87f512bb9eb5c1d44e9a1182ffeaf23d6c5ae8.diff
>>>
>>> LOG: [PowerPC] Provide some P8-specific altivec overloads for P7
>>>
>>> This adds additional support for XL compatibility. There are a number
>>> of functions in altivec.h that produce a single instruction (or a
>>> very short sequence) for Power8 but can be done on Power7 without
>>> scalarization. XL provides these implementations.
>>> This patch adds the following overloads for doubleword vectors:
>>> vec_add
>>> vec_cmpeq
>>> vec_cmpgt
>>> vec_cmpge
>>> vec_cmplt
>>> vec_cmple
>>> vec_sl
>>> vec_sr
>>> vec_sra
>>>
>>> Added:
>>>
>>>
>>> Modified:
>>> clang/lib/Headers/altivec.h
>>> clang/test/CodeGen/builtins-ppc-vsx.c
>>>
>>> Removed:
>>>
>>>
>>>
>>>
>>> ################################################################################
>>> diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
>>> index cb4f35caf4d4b..0441c57e3da28 100644
>>> --- a/clang/lib/Headers/altivec.h
>>> +++ b/clang/lib/Headers/altivec.h
>>> @@ -309,6 +309,26 @@ static __inline__ vector unsigned char
>>> __attribute__((__always_inline__))
>>> vec_add_u128(vector unsigned char __a, vector unsigned char __b) {
>>> return __builtin_altivec_vadduqm(__a, __b);
>>> }
>>> +#elif defined(__VSX__)
>>> +static __inline__ vector signed long long __ATTRS_o_ai
>>> +vec_add(vector signed long long __a, vector signed long long __b) {
>>> + vector unsigned int __res =
>>> + (vector unsigned int)__a + (vector unsigned int)__b;
>>> + vector unsigned int __carry = __builtin_altivec_vaddcuw(
>>> + (vector unsigned int)__a, (vector unsigned int)__b);
>>> +#ifdef __LITTLE_ENDIAN__
>>> + __carry = __builtin_shufflevector(__carry, __carry, 3, 0, 1, 2);
>>> +#else
>>> + __carry = __builtin_shufflevector(__carry, __carry, 1, 2, 3, 0);
>>> +#endif
>>> + return (vector signed long long)(__res + __carry);
>>> +}
>>> +
>>> +static __inline__ vector unsigned long long __ATTRS_o_ai
>>> +vec_add(vector unsigned long long __a, vector unsigned long long __b) {
>>> + return (vector unsigned long long)vec_add((vector signed long
>>> long)__a,
>>> + (vector signed long
>>> long)__b);
>>> +}
>>> #endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
>>>
>>> static __inline__ vector float __ATTRS_o_ai vec_add(vector float __a,
>>> @@ -1730,7 +1750,31 @@ vec_cmpeq(vector bool long long __a, vector bool
>>> long long __b) {
>>> return (vector bool long long)__builtin_altivec_vcmpequd(
>>> (vector long long)__a, (vector long long)__b);
>>> }
>>> +#else
>>> +static __inline__ vector bool long long __ATTRS_o_ai
>>> +vec_cmpeq(vector signed long long __a, vector signed long long __b) {
>>> + vector bool int __wordcmp =
>>> + vec_cmpeq((vector signed int)__a, (vector signed int)__b);
>>> +#ifdef __LITTLE_ENDIAN__
>>> + __wordcmp &= __builtin_shufflevector(__wordcmp, __wordcmp, 3, 0, 1,
>>> 2);
>>> + return (vector bool long long)__builtin_shufflevector(__wordcmp,
>>> __wordcmp, 1,
>>> + 1, 3, 3);
>>> +#else
>>> + __wordcmp &= __builtin_shufflevector(__wordcmp, __wordcmp, 1, 2, 3,
>>> 0);
>>> + return (vector bool long long)__builtin_shufflevector(__wordcmp,
>>> __wordcmp, 0,
>>> + 0, 2, 2);
>>> +#endif
>>> +}
>>>
>>> +static __inline__ vector bool long long __ATTRS_o_ai
>>> +vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b)
>>> {
>>> + return vec_cmpeq((vector signed long long)__a, (vector signed long
>>> long)__b);
>>> +}
>>> +
>>> +static __inline__ vector bool long long __ATTRS_o_ai
>>> +vec_cmpeq(vector bool long long __a, vector bool long long __b) {
>>> + return vec_cmpeq((vector signed long long)__a, (vector signed long
>>> long)__b);
>>> +}
>>> #endif
>>>
>>> static __inline__ vector bool int __ATTRS_o_ai vec_cmpeq(vector float
>>> __a,
>>> @@ -2018,6 +2062,24 @@ vec_cmpne(vector unsigned long long __a, vector
>>> unsigned long long __b) {
>>> return (vector bool long long)
>>> ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long
>>> long)__b));
>>> }
>>> +#else
>>> +static __inline__ vector bool long long __ATTRS_o_ai
>>> +vec_cmpne(vector bool long long __a, vector bool long long __b) {
>>> + return (vector bool long long)~(
>>> + vec_cmpeq((vector signed long long)__a, (vector signed long
>>> long)__b));
>>> +}
>>> +
>>> +static __inline__ vector bool long long __ATTRS_o_ai
>>> +vec_cmpne(vector signed long long __a, vector signed long long __b) {
>>> + return (vector bool long long)~(
>>> + vec_cmpeq((vector signed long long)__a, (vector signed long
>>> long)__b));
>>> +}
>>> +
>>> +static __inline__ vector bool long long __ATTRS_o_ai
>>> +vec_cmpne(vector unsigned long long __a, vector unsigned long long __b)
>>> {
>>> + return (vector bool long long)~(
>>> + vec_cmpeq((vector signed long long)__a, (vector signed long
>>> long)__b));
>>> +}
>>> #endif
>>>
>>> #ifdef __VSX__
>>> @@ -2070,6 +2132,46 @@ static __inline__ vector bool long long
>>> __ATTRS_o_ai
>>> vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b)
>>> {
>>> return (vector bool long long)__builtin_altivec_vcmpgtud(__a, __b);
>>> }
>>> +#else
>>> +static __inline__ vector bool long long __ATTRS_o_ai
>>> +vec_cmpgt(vector signed long long __a, vector signed long long __b) {
>>> + vector signed int __sgtw =
>>> + vec_cmpgt((vector signed int)__a, (vector signed int)__b);
>>> + vector unsigned int __ugtw =
>>> + vec_cmpgt((vector unsigned int)__a, (vector unsigned int)__b);
>>> + vector bool int __eqw =
>>> + vec_cmpeq((vector signed int)__a, (vector signed int)__b);
>>> +#ifdef __LITTLE_ENDIAN__
>>> + __ugtw = __builtin_shufflevector(__ugtw, __ugtw, 3, 0, 1, 2) & __eqw;
>>> + __sgtw |= (vector signed int)__ugtw;
>>> + return (vector bool long long)__builtin_shufflevector(__sgtw, __sgtw,
>>> 1, 1, 3,
>>> + 3);
>>> +#else
>>> + __ugtw = __builtin_shufflevector(__ugtw, __ugtw, 1, 2, 3, 0) & __eqw;
>>> + __sgtw |= (vector signed int)__ugtw;
>>> + return (vector bool long long)__builtin_shufflevector(__sgtw, __sgtw,
>>> 0, 0, 2,
>>> + 2);
>>> +#endif
>>> +}
>>> +
>>> +static __inline__ vector bool long long __ATTRS_o_ai
>>> +vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b)
>>> {
>>> + vector unsigned int __ugtw =
>>> + vec_cmpgt((vector unsigned int)__a, (vector unsigned int)__b);
>>> + vector bool int __eqw =
>>> + vec_cmpeq((vector signed int)__a, (vector signed int)__b);
>>> +#ifdef __LITTLE_ENDIAN__
>>> + __eqw = __builtin_shufflevector(__ugtw, __ugtw, 3, 0, 1, 2) & __eqw;
>>> + __ugtw |= __eqw;
>>> + return (vector bool long long)__builtin_shufflevector(__ugtw, __ugtw,
>>> 1, 1, 3,
>>> + 3);
>>> +#else
>>> + __eqw = __builtin_shufflevector(__ugtw, __ugtw, 1, 2, 3, 0) & __eqw;
>>> + __ugtw |= __eqw;
>>> + return (vector bool long long)__builtin_shufflevector(__ugtw, __ugtw,
>>> 0, 0, 2,
>>> + 2);
>>> +#endif
>>> +}
>>> #endif
>>>
>>> static __inline__ vector bool int __ATTRS_o_ai vec_cmpgt(vector float
>>> __a,
>>> @@ -2150,7 +2252,6 @@ vec_cmpge(vector double __a, vector double __b) {
>>> }
>>> #endif
>>>
>>> -#ifdef __POWER8_VECTOR__
>>> static __inline__ vector bool long long __ATTRS_o_ai
>>> vec_cmpge(vector signed long long __a, vector signed long long __b) {
>>> return ~(vec_cmpgt(__b, __a));
>>> @@ -2160,7 +2261,6 @@ static __inline__ vector bool long long
>>> __ATTRS_o_ai
>>> vec_cmpge(vector unsigned long long __a, vector unsigned long long __b)
>>> {
>>> return ~(vec_cmpgt(__b, __a));
>>> }
>>> -#endif
>>>
>>> #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__)
>>> static __inline__ vector bool __int128 __ATTRS_o_ai
>>> @@ -2274,7 +2374,6 @@ vec_cmple(vector double __a, vector double __b) {
>>> }
>>> #endif
>>>
>>> -#ifdef __POWER8_VECTOR__
>>> static __inline__ vector bool long long __ATTRS_o_ai
>>> vec_cmple(vector signed long long __a, vector signed long long __b) {
>>> return vec_cmpge(__b, __a);
>>> @@ -2284,7 +2383,6 @@ static __inline__ vector bool long long
>>> __ATTRS_o_ai
>>> vec_cmple(vector unsigned long long __a, vector unsigned long long __b)
>>> {
>>> return vec_cmpge(__b, __a);
>>> }
>>> -#endif
>>>
>>> #if defined(__POWER10_VECTOR__) && defined(__SIZEOF_INT128__)
>>> static __inline__ vector bool __int128 __ATTRS_o_ai
>>> @@ -2354,7 +2452,6 @@ vec_cmplt(vector unsigned __int128 __a, vector
>>> unsigned __int128 __b) {
>>> }
>>> #endif
>>>
>>> -#ifdef __POWER8_VECTOR__
>>> static __inline__ vector bool long long __ATTRS_o_ai
>>> vec_cmplt(vector signed long long __a, vector signed long long __b) {
>>> return vec_cmpgt(__b, __a);
>>> @@ -2365,6 +2462,7 @@ vec_cmplt(vector unsigned long long __a, vector
>>> unsigned long long __b) {
>>> return vec_cmpgt(__b, __a);
>>> }
>>>
>>> +#ifdef __POWER8_VECTOR__
>>> /* vec_popcnt */
>>>
>>> static __inline__ vector signed char __ATTRS_o_ai
>>> @@ -8725,6 +8823,46 @@ vec_sl(vector unsigned long long __a, vector
>>> unsigned long long __b) {
>>> __CHAR_BIT__));
>>> }
>>>
>>> +static __inline__ vector long long __ATTRS_o_ai
>>> +vec_sl(vector long long __a, vector unsigned long long __b) {
>>> + return (vector long long)vec_sl((vector unsigned long long)__a, __b);
>>> +}
>>> +#else
>>> +static __inline__ vector unsigned char __ATTRS_o_ai
>>> +vec_vspltb(vector unsigned char __a, unsigned char __b);
>>> +static __inline__ vector unsigned long long __ATTRS_o_ai
>>> +vec_sl(vector unsigned long long __a, vector unsigned long long __b) {
>>> + __b %= (vector unsigned long long)(sizeof(unsigned long long) *
>>> __CHAR_BIT__);
>>> +
>>> + // Big endian element one (the right doubleword) can be left shifted
>>> as-is.
>>> + // The other element needs to be swapped into the right doubleword and
>>> + // shifted. Then the right doublewords of the two result vectors are
>>> merged.
>>> + vector signed long long __rightelt =
>>> + (vector signed long long)__builtin_altivec_vslo((vector signed
>>> int)__a,
>>> + (vector signed
>>> int)__b);
>>> +#ifdef __LITTLE_ENDIAN__
>>> + __rightelt = (vector signed long long)__builtin_altivec_vsl(
>>> + (vector signed int)__rightelt, vec_vspltb((vector unsigned
>>> char)__b, 0));
>>> +#else
>>> + __rightelt = (vector signed long long)__builtin_altivec_vsl(
>>> + (vector signed int)__rightelt, vec_vspltb((vector unsigned
>>> char)__b, 15));
>>> +#endif
>>> + __a = __builtin_shufflevector(__a, __a, 1, 0);
>>> + __b = __builtin_shufflevector(__b, __b, 1, 0);
>>> + vector signed long long __leftelt =
>>> + (vector signed long long)__builtin_altivec_vslo((vector signed
>>> int)__a,
>>> + (vector signed
>>> int)__b);
>>> +#ifdef __LITTLE_ENDIAN__
>>> + __leftelt = (vector signed long long)__builtin_altivec_vsl(
>>> + (vector signed int)__leftelt, vec_vspltb((vector unsigned
>>> char)__b, 0));
>>> + return __builtin_shufflevector(__rightelt, __leftelt, 0, 2);
>>> +#else
>>> + __leftelt = (vector signed long long)__builtin_altivec_vsl(
>>> + (vector signed int)__leftelt, vec_vspltb((vector unsigned
>>> char)__b, 15));
>>> + return __builtin_shufflevector(__leftelt, __rightelt, 1, 3);
>>> +#endif
>>> +}
>>> +
>>> static __inline__ vector long long __ATTRS_o_ai
>>> vec_sl(vector long long __a, vector unsigned long long __b) {
>>> return (vector long long)vec_sl((vector unsigned long long)__a, __b);
>>> @@ -10190,6 +10328,47 @@ vec_sr(vector unsigned long long __a, vector
>>> unsigned long long __b) {
>>> __CHAR_BIT__));
>>> }
>>>
>>> +static __inline__ vector long long __ATTRS_o_ai
>>> +vec_sr(vector long long __a, vector unsigned long long __b) {
>>> + return (vector long long)vec_sr((vector unsigned long long)__a, __b);
>>> +}
>>> +#else
>>> +static __inline__ vector unsigned long long __ATTRS_o_ai
>>> +vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
>>> + __b %= (vector unsigned long long)(sizeof(unsigned long long) *
>>> __CHAR_BIT__);
>>> +
>>> + // Big endian element zero (the left doubleword) can be right shifted
>>> as-is.
>>> + // However the shift amount must be in the right doubleword.
>>> + // The other element needs to be swapped into the left doubleword and
>>> + // shifted. Then the left doublewords of the two result vectors are
>>> merged.
>>> + vector signed long long __swapshift = __builtin_shufflevector(__b,
>>> __b, 1, 0);
>>> + vector signed long long __leftelt =
>>> + (vector signed long long)__builtin_altivec_vsro((vector signed
>>> int)__a,
>>> + __swapshift);
>>> +#ifdef __LITTLE_ENDIAN__
>>> + __leftelt = (vector signed long long)__builtin_altivec_vsr(
>>> + (vector signed int)__leftelt,
>>> + vec_vspltb((vector unsigned char)__swapshift, 0));
>>> +#else
>>> + __leftelt = (vector signed long long)__builtin_altivec_vsr(
>>> + (vector signed int)__leftelt,
>>> + vec_vspltb((vector unsigned char)__swapshift, 15));
>>> +#endif
>>> + __a = __builtin_shufflevector(__a, __a, 1, 0);
>>> + vector signed long long __rightelt =
>>> + (vector signed long long)__builtin_altivec_vsro((vector signed
>>> int)__a,
>>> + (vector signed
>>> int)__b);
>>> +#ifdef __LITTLE_ENDIAN__
>>> + __rightelt = (vector signed long long)__builtin_altivec_vsr(
>>> + (vector signed int)__rightelt, vec_vspltb((vector unsigned
>>> char)__b, 0));
>>> + return __builtin_shufflevector(__rightelt, __leftelt, 1, 3);
>>> +#else
>>> + __rightelt = (vector signed long long)__builtin_altivec_vsr(
>>> + (vector signed int)__rightelt, vec_vspltb((vector unsigned
>>> char)__b, 15));
>>> + return __builtin_shufflevector(__leftelt, __rightelt, 0, 2);
>>> +#endif
>>> +}
>>> +
>>> static __inline__ vector long long __ATTRS_o_ai
>>> vec_sr(vector long long __a, vector unsigned long long __b) {
>>> return (vector long long)vec_sr((vector unsigned long long)__a, __b);
>>> @@ -10280,6 +10459,18 @@ static __inline__ vector unsigned long long
>>> __ATTRS_o_ai
>>> vec_sra(vector unsigned long long __a, vector unsigned long long __b) {
>>> return (vector unsigned long long)((vector signed long long)__a >>
>>> __b);
>>> }
>>> +#else
>>> +static __inline__ vector signed long long __ATTRS_o_ai
>>> +vec_sra(vector signed long long __a, vector unsigned long long __b) {
>>> + __b %= (vector unsigned long long)(sizeof(unsigned long long) *
>>> __CHAR_BIT__);
>>> + return __a >> __b;
>>> +}
>>> +
>>> +static __inline__ vector unsigned long long __ATTRS_o_ai
>>> +vec_sra(vector unsigned long long __a, vector unsigned long long __b) {
>>> + __b %= (vector unsigned long long)(sizeof(unsigned long long) *
>>> __CHAR_BIT__);
>>> + return (vector unsigned long long)((vector signed long long)__a >>
>>> __b);
>>> +}
>>> #endif
>>>
>>> /* vec_vsrab */
>>>
>>> diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c
>>> b/clang/test/CodeGen/builtins-ppc-vsx.c
>>> index ecae9a620e426..4bb7a05a435a2 100644
>>> --- a/clang/test/CodeGen/builtins-ppc-vsx.c
>>> +++ b/clang/test/CodeGen/builtins-ppc-vsx.c
>>> @@ -2313,3 +2313,282 @@ vector double test_rsqrtd(vector double a,
>>> vector double b) {
>>> // CHECK-LE: fdiv fast <2 x double> <double 1.000000e+00, double
>>> 1.000000e+00>
>>> return vec_rsqrt(a);
>>> }
>>> +
>>> +void test_p8overloads_backwards_compat() {
>>> + // CHECK-LABEL: test_p8overloads_backwards_compat
>>> + res_vsll = vec_add(vsll, vsll);
>>> + // CHECK: add <4 x i32>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vaddcuw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: add <4 x i32>
>>> + // CHECK-LE: add <4 x i32>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vaddcuw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: add <4 x i32>
>>> + res_vull = vec_add(vull, vull);
>>> + // CHECK: add <4 x i32>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vaddcuw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: add <4 x i32>
>>> + // CHECK-LE: add <4 x i32>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vaddcuw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: add <4 x i32>
>>> + dummy();
>>> + // CHECK: call void @dummy()
>>> + // CHECK-LE: call void @dummy()
>>> +
>>> + res_vbll = vec_cmpeq(vsll, vsll);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + res_vbll = vec_cmpeq(vull, vull);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + res_vbll = vec_cmpeq(vbll, vbll);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + dummy();
>>> + // CHECK: call void @dummy()
>>> + // CHECK-LE: call void @dummy()
>>> +
>>> + res_vbll = vec_cmpgt(vsll, vsll);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: or <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: or <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + res_vbll = vec_cmpgt(vull, vull);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: or <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: or <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + dummy();
>>> + // CHECK: call void @dummy()
>>> + // CHECK-LE: call void @dummy()
>>> +
>>> + res_vbll = vec_cmpge(vsll, vsll);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: or <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: or <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + // CHECK-LE: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
>>> + res_vbll = vec_cmpge(vull, vull);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: or <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: or <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + // CHECK-LE: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
>>> + dummy();
>>> + // CHECK: call void @dummy()
>>> + // CHECK-LE: call void @dummy()
>>> +
>>> + res_vbll = vec_cmplt(vsll, vsll);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: or <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: or <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + res_vbll = vec_cmplt(vull, vull);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: or <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: or <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + dummy();
>>> + // CHECK: call void @dummy()
>>> + // CHECK-LE: call void @dummy()
>>> +
>>> + res_vbll = vec_cmple(vsll, vsll);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: or <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: or <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + // CHECK-LE: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
>>> + res_vbll = vec_cmple(vull, vull);
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 2, i32 3, i32 0>
>>> + // CHECK: and <4 x i32>
>>> + // CHECK: or <4 x i32>
>>> + // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 0, i32 0, i32 2, i32 2>
>>> + // CHECK: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 3, i32 0, i32 1, i32 2>
>>> + // CHECK-LE: and <4 x i32>
>>> + // CHECK-LE: or <4 x i32>
>>> + // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x
>>> i32> <i32 1, i32 1, i32 3, i32 3>
>>> + // CHECK-LE: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
>>> + dummy();
>>> + // CHECK: call void @dummy()
>>> + // CHECK-LE: call void @dummy()
>>> +
>>> + res_vsll = vec_sl(vsll, vull);
>>> + // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 3>
>>> + // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 0, i32 2>
>>> + res_vull = vec_sl(vull, vull);
>>> + // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 3>
>>> + // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 0, i32 2>
>>> + dummy();
>>> + // CHECK: call void @dummy()
>>> + // CHECK-LE: call void @dummy()
>>> +
>>> + res_vsll = vec_sr(vsll, vull);
>>> + // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 0, i32 2>
>>> + // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 3>
>>> + res_vull = vec_sr(vull, vull);
>>> + // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro
>>> + // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr
>>> + // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 0, i32 2>
>>> + // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 0>
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro
>>> + // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr
>>> + // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x
>>> i32> <i32 1, i32 3>
>>> + dummy();
>>> + // CHECK: call void @dummy()
>>> + // CHECK-LE: call void @dummy()
>>> +
>>> + res_vsll = vec_sra(vsll, vull);
>>> + // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK: ashr <2 x i64>
>>> + // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK-LE: ashr <2 x i64>
>>> + res_vull = vec_sra(vull, vull);
>>> + // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK: ashr <2 x i64>
>>> + // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
>>> + // CHECK-LE: ashr <2 x i64>
>>> +}
>>>
>>>
>>>
>>> _______________________________________________
>>> cfe-commits mailing list
>>> cfe-commits at lists.llvm.org
>>> https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20210506/06bfd8c1/attachment-0001.html>
More information about the cfe-commits
mailing list