[cfe-commits] r134754 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/x86_64-arguments.c

Fri Jul 8 15:36:06 PDT 2011

On Fri, Jul 8, 2011 at 3:18 PM, Bruno Cardoso Lopes
<bruno.cardoso at gmail.com> wrote:
> Author: bruno
> Date: Fri Jul  8 17:18:40 2011
> New Revision: 134754
>
> URL: http://llvm.org/viewvc/llvm-project?rev=134754&view=rev
> Log:
> Add support for AVX 256-bit in the x86_64 ABI (as in the 0.99.5 draft)
>
> Modified:
>    cfe/trunk/lib/CodeGen/TargetInfo.cpp
>    cfe/trunk/test/CodeGen/x86_64-arguments.c

It looks like this change affects vector types like <8 x i32> as well;
was that intentional?  (I haven't looked closely at the revised ABI
document.)

It looks like in non-AVX mode, va_arg on <8 x float> is broken.  Please fix.

It isn't obvious to me looking at the code that {<4 x float>, <4 x
float>} is getting handled correctly; could you please add a test, and
fix it if it's broken?

-Eli

> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=134754&r1=134753&r2=134754&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Fri Jul  8 17:18:40 2011
> @@ -820,6 +820,22 @@
>   /// should just return Memory for the aggregate).
>   static Class merge(Class Accum, Class Field);
>
> +  /// postMerge - Implement the X86_64 ABI post merging algorithm.
> +  ///
> +  /// Post merger cleanup, reduces a malformed Hi and Lo pair to
> +  /// final MEMORY or SSE classes when necessary.
> +  ///
> +  /// \param AggregateSize - The size of the current aggregate in
> +  /// the classification process.
> +  ///
> +  /// \param Lo - The classification for the parts of the type
> +  /// residing in the low word of the containing object.
> +  ///
> +  /// \param Hi - The classification for the parts of the type
> +  /// residing in the higher words of the containing object.
> +  ///
> +  void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
> +
>   /// classify - Determine the x86_64 register classes in which the
>   /// given type T should be passed.
>   ///
> @@ -843,7 +859,7 @@
>   /// also be ComplexX87.
>   void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi) const;
>
> -  const llvm::Type *Get16ByteVectorType(QualType Ty) const;
> +  const llvm::Type *GetByteVectorType(QualType Ty) const;
>   const llvm::Type *GetSSETypeAtOffset(const llvm::Type *IRType,
>                                        unsigned IROffset, QualType SourceTy,
>                                        unsigned SourceOffset) const;
> @@ -956,6 +972,39 @@
>
>  }
>
> +void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
> +                              Class &Hi) const {
> +  // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
> +  //
> +  // (a) If one of the classes is Memory, the whole argument is passed in
> +  //     memory.
> +  //
> +  // (b) If X87UP is not preceded by X87, the whole argument is passed in
> +  //     memory.
> +  //
> +  // (c) If the size of the aggregate exceeds two eightbytes and the first
> +  //     eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
> +  //     argument is passed in memory. NOTE: This is necessary to keep the
> +  //     ABI working for processors that don't support the __m256 type.
> +  //
> +  // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
> +  //
> +  // Some of these are enforced by the merging logic.  Others can arise
> +  // only with unions; for example:
> +  //   union { _Complex double; unsigned; }
> +  //
> +  // Note that clauses (b) and (c) were added in 0.98.
> +  //
> +  if (Hi == Memory)
> +    Lo = Memory;
> +  if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
> +    Lo = Memory;
> +  if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp))
> +    Lo = Memory;
> +  if (Hi == SSEUp && Lo != SSE)
> +    Hi = SSE;
> +}
> +
>  X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
>   // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
>   // classified recursively so that always two fields are
> @@ -1082,7 +1131,14 @@
>       // split.
>       if (OffsetBase && OffsetBase != 64)
>         Hi = Lo;
> -    } else if (Size == 128) {
> +    } else if (Size == 128 | Size == 256) {
> +      // Arguments of 256-bits are split into four eightbyte chunks. The
> +      // least significant one belongs to class SSE and all the others to class
> +      // SSEUP. The original Lo and Hi design considers that types can't be
> +      // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
> +      // This design isn't correct for 256-bits, but since there're no cases
> +      // where the upper parts would need to be inspected, avoid adding
> +      // complexity and just consider Hi to match the 64-256 part.
>       Lo = SSE;
>       Hi = SSEUp;
>     }
> @@ -1121,8 +1177,8 @@
>     uint64_t Size = getContext().getTypeSize(Ty);
>
>     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
> -    // than two eightbytes, ..., it has class MEMORY.
> -    if (Size > 128)
> +    // than four eightbytes, ..., it has class MEMORY.
> +    if (Size > 256)
>       return;
>
>     // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
> @@ -1146,9 +1202,7 @@
>         break;
>     }
>
> -    // Do post merger cleanup (see below). Only case we worry about is Memory.
> -    if (Hi == Memory)
> -      Lo = Memory;
> +    postMerge(Size, Lo, Hi);
>     assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification.");
>     return;
>   }
> @@ -1157,8 +1211,8 @@
>     uint64_t Size = getContext().getTypeSize(Ty);
>
>     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
> -    // than two eightbytes, ..., it has class MEMORY.
> -    if (Size > 128)
> +    // than four eightbytes, ..., it has class MEMORY.
> +    if (Size > 256)
>       return;
>
>     // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
> @@ -1257,31 +1311,7 @@
>         break;
>     }
>
> -    // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
> -    //
> -    // (a) If one of the classes is MEMORY, the whole argument is
> -    // passed in memory.
> -    //
> -    // (b) If X87UP is not preceded by X87, the whole argument is
> -    // passed in memory.
> -    //
> -    // (c) If the size of the aggregate exceeds two eightbytes and the first
> -    // eight-byte isn't SSE or any other eightbyte isn't SSEUP, the whole
> -    // argument is passed in memory.
> -    //
> -    // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
> -    //
> -    // Some of these are enforced by the merging logic.  Others can arise
> -    // only with unions; for example:
> -    //   union { _Complex double; unsigned; }
> -    //
> -    // Note that clauses (b) and (c) were added in 0.98.
> -    if (Hi == Memory)
> -      Lo = Memory;
> -    if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
> -      Lo = Memory;
> -    if (Hi == SSEUp && Lo != SSE)
> -      Hi = SSE;
> +    postMerge(Size, Lo, Hi);
>   }
>  }
>
> @@ -1321,10 +1351,10 @@
>   return ABIArgInfo::getIndirect(Align);
>  }
>
> -/// Get16ByteVectorType - The ABI specifies that a value should be passed in an
> -/// full vector XMM register.  Pick an LLVM IR type that will be passed as a
> +/// GetByteVectorType - The ABI specifies that a value should be passed in an
> +/// full vector XMM/YMM register.  Pick an LLVM IR type that will be passed as a
>  /// vector register.
> -const llvm::Type *X86_64ABIInfo::Get16ByteVectorType(QualType Ty) const {
> +const llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
>   const llvm::Type *IRType = CGT.ConvertTypeRecursive(Ty);
>
>   // Wrapper structs that just contain vectors are passed just like vectors,
> @@ -1335,10 +1365,11 @@
>     STy = dyn_cast<llvm::StructType>(IRType);
>   }
>
> -  // If the preferred type is a 16-byte vector, prefer to pass it.
> +  // If the preferred type is a 16/32-byte vector, prefer to pass it.
>   if (const llvm::VectorType *VT = dyn_cast<llvm::VectorType>(IRType)){
>     const llvm::Type *EltTy = VT->getElementType();
> -    if (VT->getBitWidth() == 128 &&
> +    unsigned BitWidth = VT->getBitWidth();
> +    if ((BitWidth == 128 || BitWidth == 256) &&
>         (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
>          EltTy->isIntegerTy(8) || EltTy->isIntegerTy(16) ||
>          EltTy->isIntegerTy(32) || EltTy->isIntegerTy(64) ||
> @@ -1701,12 +1732,13 @@
>     break;
>
>     // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
> -    // is passed in the upper half of the last used SSE register.
> +    // is passed in the next available eightbyte chunk if the last used
> +    // vector register.
>     //
>     // SSEUP should always be preceded by SSE, just widen.
>   case SSEUp:
>     assert(Lo == SSE && "Unexpected SSEUp classification.");
> -    ResType = Get16ByteVectorType(RetTy);
> +    ResType = GetByteVectorType(RetTy);
>     break;
>
>     // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
> @@ -1846,7 +1878,7 @@
>     // register.  This only happens when 128-bit vectors are passed.
>   case SSEUp:
>     assert(Lo == SSE && "Unexpected SSEUp classification");
> -    ResType = Get16ByteVectorType(Ty);
> +    ResType = GetByteVectorType(Ty);
>     break;
>   }
>
>
> Modified: cfe/trunk/test/CodeGen/x86_64-arguments.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_64-arguments.c?rev=134754&r1=134753&r2=134754&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/x86_64-arguments.c (original)
> +++ cfe/trunk/test/CodeGen/x86_64-arguments.c Fri Jul  8 17:18:40 2011
> @@ -262,3 +262,18 @@
>  // CHECK: define double @f36(double %arg.coerce)
>  typedef unsigned v2i32 __attribute((__vector_size__(8)));
>  v2i32 f36(v2i32 arg) { return arg; }
> +
> +// CHECK: declare void @f38(<8 x float>)
> +// CHECK: declare void @f37(<8 x float>)
> +typedef float __m256 __attribute__ ((__vector_size__ (32)));
> +typedef struct {
> +  __m256 m;
> +} s256;
> +
> +s256 x38;
> +__m256 x37;
> +
> +void f38(s256 x);
> +void f37(__m256 x);
> +void f39() { f38(x38); f37(x37); }
> +
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>