r242297 - [PPC64LE] Fix vec_sld semantics for little endian
Hal Finkel
hfinkel at anl.gov
Wed Jul 15 08:57:51 PDT 2015
Test case?
-Hal
----- Original Message -----
> From: "Bill Schmidt" <wschmidt at linux.vnet.ibm.com>
> To: cfe-commits at cs.uiuc.edu
> Sent: Wednesday, July 15, 2015 10:45:53 AM
> Subject: r242297 - [PPC64LE] Fix vec_sld semantics for little endian
>
> Author: wschmidt
> Date: Wed Jul 15 10:45:53 2015
> New Revision: 242297
>
> URL: http://llvm.org/viewvc/llvm-project?rev=242297&view=rev
> Log:
> [PPC64LE] Fix vec_sld semantics for little endian
>
> The vec_sld interface provides access to the vsldoi instruction.
> Unlike most of the vec_* interfaces, we do not attempt to change the
> generated code for vec_sld based on the endian mode. It is too
> difficult to correctly infer the desired semantics because of
> different element types, and the corrected instruction sequence is
> expensive, involving loading a permute control vector and performing
> a
> generalized permute.
>
> For GCC, this was implemented as "Don't touch the vec_sld"
> implementation. When it came time for the LLVM implementation, I did
> the same thing. However, this was hasty and incorrect. In LLVM's
> version of altivec.h, vec_sld was previously defined in terms of the
> vec_perm interface. Because vec_perm semantics are adjusted for
> little endian, this means that leaving vec_sld untouched causes it to
> generate something different for LE than for BE. Not good.
>
> This patch adjusts the form of vec_perm that is used for vec_sld and
> vec_vsldoi, effectively undoing the modifications so that the same
> vsldoi instruction will be generated for both BE and LE.
>
> There is an accompanying back-end patch to take care of some small
> ripple effects caused by these changes.
>
> Modified:
> cfe/trunk/lib/Headers/altivec.h
>
> Modified: cfe/trunk/lib/Headers/altivec.h
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=242297&r1=242296&r2=242297&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Headers/altivec.h (original)
> +++ cfe/trunk/lib/Headers/altivec.h Wed Jul 15 10:45:53 2015
> @@ -6563,119 +6563,218 @@ static vector signed char __ATTRS_o_ai v
> vector signed char
> __b,
> unsigned const int
> __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned
> char __a,
> vector unsigned
> char __b,
> unsigned const int
> __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector bool char __ATTRS_o_ai vec_sld(vector bool char __a,
> vector bool char __b,
> unsigned const int __c)
> {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector signed short __ATTRS_o_ai vec_sld(vector signed short
> __a,
> vector signed short
> __b,
> unsigned const int
> __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned
> short __a,
> vector unsigned
> short __b,
> unsigned const int
> __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector bool short __ATTRS_o_ai vec_sld(vector bool short __a,
> vector bool short __b,
> unsigned const int
> __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector pixel __ATTRS_o_ai vec_sld(vector pixel __a, vector
> pixel __b,
> unsigned const int __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector signed int __ATTRS_o_ai vec_sld(vector signed int __a,
> vector signed int __b,
> unsigned const int
> __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int
> __a,
> vector unsigned int
> __b,
> unsigned const int
> __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector bool int __ATTRS_o_ai vec_sld(vector bool int __a,
> vector bool int __b,
> unsigned const int __c)
> {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector float __ATTRS_o_ai vec_sld(vector float __a, vector
> float __b,
> unsigned const int __c) {
> unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> /* vec_vsldoi */
> @@ -6683,77 +6782,157 @@ static vector float __ATTRS_o_ai vec_sld
> static vector signed char __ATTRS_o_ai vec_vsldoi(vector signed char
> __a,
> vector signed char
> __b,
> unsigned char __c)
> {
> + unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c +
> 4, __c + 5,
> - __c + 6, __c + 7, __c + 8, __c + 9, __c
> + 10,
> - __c + 11, __c + 12, __c + 13, __c + 14,
> __c + 15));
> + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> + __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> + __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector unsigned char __ATTRS_o_ai vec_vsldoi(vector unsigned
> char __a,
> vector unsigned
> char __b,
> unsigned char
> __c) {
> + unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c +
> 4, __c + 5,
> - __c + 6, __c + 7, __c + 8, __c + 9, __c
> + 10,
> - __c + 11, __c + 12, __c + 13, __c + 14,
> __c + 15));
> + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> + __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> + __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector short __ATTRS_o_ai vec_vsldoi(vector short __a, vector
> short __b,
> unsigned char __c) {
> + unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c +
> 4, __c + 5,
> - __c + 6, __c + 7, __c + 8, __c + 9, __c
> + 10,
> - __c + 11, __c + 12, __c + 13, __c + 14,
> __c + 15));
> + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> + __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> + __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector unsigned short __ATTRS_o_ai vec_vsldoi(vector unsigned
> short __a,
> vector unsigned
> short __b,
> unsigned char
> __c) {
> + unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c +
> 4, __c + 5,
> - __c + 6, __c + 7, __c + 8, __c + 9, __c
> + 10,
> - __c + 11, __c + 12, __c + 13, __c + 14,
> __c + 15));
> + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> + __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> + __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector pixel __ATTRS_o_ai vec_vsldoi(vector pixel __a, vector
> pixel __b,
> unsigned char __c) {
> + unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c +
> 4, __c + 5,
> - __c + 6, __c + 7, __c + 8, __c + 9, __c
> + 10,
> - __c + 11, __c + 12, __c + 13, __c + 14,
> __c + 15));
> + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> + __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> + __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector int __ATTRS_o_ai vec_vsldoi(vector int __a, vector int
> __b,
> unsigned char __c) {
> + unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c +
> 4, __c + 5,
> - __c + 6, __c + 7, __c + 8, __c + 9, __c
> + 10,
> - __c + 11, __c + 12, __c + 13, __c + 14,
> __c + 15));
> + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> + __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> + __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector unsigned int __ATTRS_o_ai vec_vsldoi(vector unsigned
> int __a,
> vector unsigned
> int __b,
> unsigned char
> __c) {
> + unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c +
> 4, __c + 5,
> - __c + 6, __c + 7, __c + 8, __c + 9, __c
> + 10,
> - __c + 11, __c + 12, __c + 13, __c + 14,
> __c + 15));
> + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> + __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> + __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> static vector float __ATTRS_o_ai vec_vsldoi(vector float __a, vector
> float __b,
> unsigned char __c) {
> + unsigned char __d = __c & 0x0F;
> +#ifdef __LITTLE_ENDIAN__
> + return vec_perm(
> + __b, __a,
> + (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
> 20 - __d,
> + 21 - __d, 22 - __d, 23 - __d, 24 - __d,
> 25 - __d,
> + 26 - __d, 27 - __d, 28 - __d, 29 - __d,
> 30 - __d,
> + 31 - __d));
> +#else
> return vec_perm(
> __a, __b,
> - (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c +
> 4, __c + 5,
> - __c + 6, __c + 7, __c + 8, __c + 9, __c
> + 10,
> - __c + 11, __c + 12, __c + 13, __c + 14,
> __c + 15));
> + (vector unsigned char)(__d, __d + 1, __d + 2, __d + 3, __d +
> 4, __d + 5,
> + __d + 6, __d + 7, __d + 8, __d + 9, __d
> + 10,
> + __d + 11, __d + 12, __d + 13, __d + 14,
> __d + 15));
> +#endif
> }
>
> /* vec_sll */
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
--
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory
More information about the cfe-commits
mailing list