r210340 - [PPC64LE] Implement little-endian semantics for vec_pack family

Hal Finkel hfinkel at anl.gov
Fri Jun 6 09:33:47 PDT 2014


----- Original Message -----
> From: "Bill Schmidt" <wschmidt at linux.vnet.ibm.com>
> To: "Hal Finkel" <hfinkel at anl.gov>
> Cc: cfe-commits at cs.uiuc.edu
> Sent: Friday, June 6, 2014 11:32:29 AM
> Subject: Re: r210340 - [PPC64LE] Implement little-endian semantics for vec_pack	family
> 
> Hm, that probably isn't true.  I suppose the test will "pass" by
> producing the wrong (big-endian) code generation, so this is a
> reasonable approach.
> 
> I'll look into it.

Okay, thanks. Having the tests with the commits is almost always better, so if it is feasible, then I'd prefer it.

Thanks again,
Hal

> 
> Bill
> 
> On Fri, 2014-06-06 at 11:07 -0500, Hal Finkel wrote:
> > ----- Original Message -----
> > > From: "Bill Schmidt" <wschmidt at linux.vnet.ibm.com>
> > > To: cfe-commits at cs.uiuc.edu
> > > Sent: Friday, June 6, 2014 10:10:47 AM
> > > Subject: r210340 - [PPC64LE] Implement little-endian semantics
> > > for vec_pack	family
> > > 
> > > Author: wschmidt
> > > Date: Fri Jun  6 10:10:47 2014
> > > New Revision: 210340
> > > 
> > > URL: http://llvm.org/viewvc/llvm-project?rev=210340&view=rev
> > > Log:
> > > [PPC64LE] Implement little-endian semantics for vec_pack family
> > > 
> > > The PowerPC vector-pack instructions are defined architecturally
> > > with
> > > a big-endian bias, in that the vector element numbering is
> > > assumed to
> > > be "left to right" regardless of whether the processor is in
> > > big-endian or little-endian mode.  This definition is unnatural
> > > for
> > > little-endian code generation.
> > > 
> > > To facilitate ease of porting, the vec_pack and related
> > > interfaces
> > > are
> > > designed to use natural element ordering, so that elements are
> > > numbered according to little-endian design principles when code
> > > is
> > > generated for a little-endian target.  The vec_pack calls are
> > > implemented as calls to vec_perm, specifying selection of the
> > > odd-numbered vector elements.  For little endian, this means the
> > > odd-numbered elements counting from the right end of the
> > > register.
> > > Since the underlying instructions count from the left end, we
> > > must
> > > instead select the even-numbered vector elements for little
> > > endian to
> > > achieve the desired semantics.
> > > 
> > > The correctness of this code is tested by the new pack.c test
> > > added
> > > in
> > > a previous patch.  I plan to later make the existing ppc32
> > > Altivec
> > > compile-time tests work for ppc64 and ppc64le as well.
> > 
> > I don't understand this...
> > 
> > test/CodeGen/builtins-ppc-altivec.c seems to be the relevant place
> > to test this, and this test works fine on ppc64 (if you change the
> > current triple from powerpc-unknown-unknown to
> > powerpc64-unknown-unknown it still passes). Please do this and add
> > tests for these changes there.
> > 
> > Thanks again,
> > Hal
> > 
> > > 
> > > Modified:
> > >     cfe/trunk/lib/Headers/altivec.h
> > > 
> > > Modified: cfe/trunk/lib/Headers/altivec.h
> > > URL:
> > > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=210340&r1=210339&r2=210340&view=diff
> > > ==============================================================================
> > > --- cfe/trunk/lib/Headers/altivec.h (original)
> > > +++ cfe/trunk/lib/Headers/altivec.h Fri Jun  6 10:10:47 2014
> > > @@ -4117,52 +4117,91 @@ vec_vor(vector float __a, vector bool in
> > >  
> > >  /* vec_pack */
> > >  
> > > +/* The various vector pack instructions have a big-endian bias,
> > > so
> > > for
> > > +   little endian we must handle reversed element numbering.  */
> > > +
> > >  static vector signed char __ATTRS_o_ai
> > >  vec_pack(vector signed short __a, vector signed short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector signed char)vec_perm(__a, __b, (vector unsigned
> > > char)
> > > +    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
> > > +     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
> > > +#else
> > >    return (vector signed char)vec_perm(__a, __b, (vector unsigned
> > >    char)
> > >      (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
> > >       0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned char __ATTRS_o_ai
> > >  vec_pack(vector unsigned short __a, vector unsigned short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector unsigned char)vec_perm(__a, __b, (vector
> > > unsigned
> > > char)
> > > +    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
> > > +     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
> > > +#else
> > >    return (vector unsigned char)vec_perm(__a, __b, (vector
> > >    unsigned
> > >    char)
> > >      (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
> > >       0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector bool char __ATTRS_o_ai
> > >  vec_pack(vector bool short __a, vector bool short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector bool char)vec_perm(__a, __b, (vector unsigned
> > > char)
> > > +    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
> > > +     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
> > > +#else
> > >    return (vector bool char)vec_perm(__a, __b, (vector unsigned
> > >    char)
> > >      (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
> > >       0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector short __ATTRS_o_ai
> > >  vec_pack(vector int __a, vector int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector short)vec_perm(__a, __b, (vector unsigned char)
> > > +    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
> > > +     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
> > > +#else
> > >    return (vector short)vec_perm(__a, __b, (vector unsigned char)
> > >      (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
> > >       0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned short __ATTRS_o_ai
> > >  vec_pack(vector unsigned int __a, vector unsigned int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector unsigned short)vec_perm(__a, __b, (vector
> > > unsigned
> > > char)
> > > +    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
> > > +     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
> > > +#else
> > >    return (vector unsigned short)vec_perm(__a, __b, (vector
> > >    unsigned
> > >    char)
> > >      (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
> > >       0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector bool short __ATTRS_o_ai
> > >  vec_pack(vector bool int __a, vector bool int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector unsigned short)vec_perm(__a, __b, (vector
> > > unsigned
> > > char)
> > > +    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
> > > +     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
> > > +#else
> > >    return (vector bool short)vec_perm(__a, __b, (vector unsigned
> > >    char)
> > >      (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
> > >       0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkuhum */
> > > @@ -4172,25 +4211,43 @@ vec_pack(vector bool int __a, vector boo
> > >  static vector signed char __ATTRS_o_ai
> > >  vec_vpkuhum(vector signed short __a, vector signed short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector signed char)vec_perm(__a, __b, (vector unsigned
> > > char)
> > > +    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
> > > +     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
> > > +#else
> > >    return (vector signed char)vec_perm(__a, __b, (vector unsigned
> > >    char)
> > >      (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
> > >       0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned char __ATTRS_o_ai
> > >  vec_vpkuhum(vector unsigned short __a, vector unsigned short
> > >  __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector unsigned char)vec_perm(__a, __b, (vector
> > > unsigned
> > > char)
> > > +    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
> > > +     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
> > > +#else
> > >    return (vector unsigned char)vec_perm(__a, __b, (vector
> > >    unsigned
> > >    char)
> > >      (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
> > >       0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector bool char __ATTRS_o_ai
> > >  vec_vpkuhum(vector bool short __a, vector bool short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector bool char)vec_perm(__a, __b, (vector unsigned
> > > char)
> > > +    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
> > > +     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
> > > +#else
> > >    return (vector bool char)vec_perm(__a, __b, (vector unsigned
> > >    char)
> > >      (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
> > >       0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkuwum */
> > > @@ -4200,25 +4257,43 @@ vec_vpkuhum(vector bool short __a, vecto
> > >  static vector short __ATTRS_o_ai
> > >  vec_vpkuwum(vector int __a, vector int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector short)vec_perm(__a, __b, (vector unsigned char)
> > > +    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
> > > +     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
> > > +#else
> > >    return (vector short)vec_perm(__a, __b, (vector unsigned char)
> > >      (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
> > >       0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned short __ATTRS_o_ai
> > >  vec_vpkuwum(vector unsigned int __a, vector unsigned int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector unsigned short)vec_perm(__a, __b, (vector
> > > unsigned
> > > char)
> > > +    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
> > > +     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
> > > +#else
> > >    return (vector unsigned short)vec_perm(__a, __b, (vector
> > >    unsigned
> > >    char)
> > >      (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
> > >       0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  static vector bool short __ATTRS_o_ai
> > >  vec_vpkuwum(vector bool int __a, vector bool int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector bool short)vec_perm(__a, __b, (vector unsigned
> > > char)
> > > +    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
> > > +     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
> > > +#else
> > >    return (vector bool short)vec_perm(__a, __b, (vector unsigned
> > >    char)
> > >      (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
> > >       0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
> > > +#endif
> > >  }
> > >  
> > >  /* vec_packpx */
> > > @@ -4226,7 +4301,11 @@ vec_vpkuwum(vector bool int __a, vector
> > >  static vector pixel __attribute__((__always_inline__))
> > >  vec_packpx(vector unsigned int __a, vector unsigned int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector pixel)__builtin_altivec_vpkpx(__b, __a);
> > > +#else
> > >    return (vector pixel)__builtin_altivec_vpkpx(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkpx */
> > > @@ -4234,7 +4313,11 @@ vec_packpx(vector unsigned int __a, vect
> > >  static vector pixel __attribute__((__always_inline__))
> > >  vec_vpkpx(vector unsigned int __a, vector unsigned int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return (vector pixel)__builtin_altivec_vpkpx(__b, __a);
> > > +#else
> > >    return (vector pixel)__builtin_altivec_vpkpx(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_packs */
> > > @@ -4242,25 +4325,41 @@ vec_vpkpx(vector unsigned int __a, vecto
> > >  static vector signed char __ATTRS_o_ai
> > >  vec_packs(vector short __a, vector short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkshss(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkshss(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned char __ATTRS_o_ai
> > >  vec_packs(vector unsigned short __a, vector unsigned short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkuhus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkuhus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  static vector signed short __ATTRS_o_ai
> > >  vec_packs(vector int __a, vector int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkswss(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkswss(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned short __ATTRS_o_ai
> > >  vec_packs(vector unsigned int __a, vector unsigned int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkuwus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkuwus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkshss */
> > > @@ -4268,7 +4367,11 @@ vec_packs(vector unsigned int __a, vecto
> > >  static vector signed char __attribute__((__always_inline__))
> > >  vec_vpkshss(vector short __a, vector short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkshss(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkshss(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkuhus */
> > > @@ -4276,7 +4379,11 @@ vec_vpkshss(vector short __a, vector sho
> > >  static vector unsigned char __attribute__((__always_inline__))
> > >  vec_vpkuhus(vector unsigned short __a, vector unsigned short
> > >  __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkuhus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkuhus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkswss */
> > > @@ -4284,7 +4391,11 @@ vec_vpkuhus(vector unsigned short __a, v
> > >  static vector signed short __attribute__((__always_inline__))
> > >  vec_vpkswss(vector int __a, vector int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkswss(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkswss(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkuwus */
> > > @@ -4292,7 +4403,11 @@ vec_vpkswss(vector int __a, vector int _
> > >  static vector unsigned short __attribute__((__always_inline__))
> > >  vec_vpkuwus(vector unsigned int __a, vector unsigned int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkuwus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkuwus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_packsu */
> > > @@ -4300,25 +4415,41 @@ vec_vpkuwus(vector unsigned int __a, vec
> > >  static vector unsigned char __ATTRS_o_ai
> > >  vec_packsu(vector short __a, vector short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkshus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkshus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned char __ATTRS_o_ai
> > >  vec_packsu(vector unsigned short __a, vector unsigned short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkuhus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkuhus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned short __ATTRS_o_ai
> > >  vec_packsu(vector int __a, vector int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkswus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkswus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned short __ATTRS_o_ai
> > >  vec_packsu(vector unsigned int __a, vector unsigned int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkuwus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkuwus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkshus */
> > > @@ -4326,13 +4457,21 @@ vec_packsu(vector unsigned int __a, vect
> > >  static vector unsigned char __ATTRS_o_ai
> > >  vec_vpkshus(vector short __a, vector short __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkshus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkshus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned char __ATTRS_o_ai
> > >  vec_vpkshus(vector unsigned short __a, vector unsigned short
> > >  __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkuhus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkuhus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_vpkswus */
> > > @@ -4340,13 +4479,21 @@ vec_vpkshus(vector unsigned short __a, v
> > >  static vector unsigned short __ATTRS_o_ai
> > >  vec_vpkswus(vector int __a, vector int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkswus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkswus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  static vector unsigned short __ATTRS_o_ai
> > >  vec_vpkswus(vector unsigned int __a, vector unsigned int __b)
> > >  {
> > > +#ifdef __LITTLE_ENDIAN__
> > > +  return __builtin_altivec_vpkuwus(__b, __a);
> > > +#else
> > >    return __builtin_altivec_vpkuwus(__a, __b);
> > > +#endif
> > >  }
> > >  
> > >  /* vec_perm */
> > > 
> > > 
> > > _______________________________________________
> > > cfe-commits mailing list
> > > cfe-commits at cs.uiuc.edu
> > > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
> > > 
> > 
> 
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory



More information about the cfe-commits mailing list