[cfe-commits] r152462 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/complex-indirect.c test/CodeGen/x86_64-arguments.c

Fri Mar 9 17:03:58 PST 2012

Author: ddunbar
Date: Fri Mar  9 19:03:58 2012
New Revision: 152462

URL: http://llvm.org/viewvc/llvm-project?rev=152462&view=rev
Log:
IRgen/ABI/x86_64: Avoid passing small structs using byval sometimes.

 - We do this when it is easy to determine that the backend will pass them on
   the stack properly by itself.

Currently LLVM codegen is really bad in some cases with byval, for example, on
the test case here (which is derived from Sema code, which likes to pass
SourceLocations around)::

  struct s47 { unsigned a; };
  void f47(int,int,int,int,int,int,struct s47);
  void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }

we used to emit code like this::

  ...
  movl	%esi, -8(%rbp)
  movl	-8(%rbp), %ecx
  movl	%ecx, (%rsp)
  ...

to handle moving the struct onto the stack, which is just appalling.

Now we generate::

  movl	%esi, (%rsp)

which seems better, no?

Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp
    cfe/trunk/test/CodeGen/complex-indirect.c
    cfe/trunk/test/CodeGen/x86_64-arguments.c

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=152462&r1=152461&r2=152462&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Fri Mar  9 19:03:58 2012
@@ -924,11 +924,15 @@
 
   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
   /// such that the argument will be passed in memory.
-  ABIArgInfo getIndirectResult(QualType Ty) const;
+  ///
+  /// \param freeIntRegs - The number of free integer registers remaining
+  /// available.
+  ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
 
   ABIArgInfo classifyArgumentType(QualType Ty,
+                                  unsigned freeIntRegs,
                                   unsigned &neededInt,
                                   unsigned &neededSSE) const;
 
@@ -951,7 +955,8 @@
 
   bool isPassedUsingAVXType(QualType type) const {
     unsigned neededInt, neededSSE;
-    ABIArgInfo info = classifyArgumentType(type, neededInt, neededSSE);
+    // The freeIntRegs argument doesn't matter here.
+    ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE);
     if (info.isDirect()) {
       llvm::Type *ty = info.getCoerceToType();
       if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
@@ -1441,9 +1446,16 @@
   return false;
 }
 
-ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty) const {
+ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
+                                            unsigned freeIntRegs) const {
   // If this is a scalar LLVM value then assume LLVM will pass it in the right
   // place naturally.
+  //
+  // This assumption is optimistic, as there could be free registers available
+  // when we need to pass this argument in memory, and LLVM could try to pass
+  // the argument in the free register. This does not seem to happen currently,
+  // but this code would be much safer if we could mark the argument with
+  // 'onstack'. See PR12193.
   if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
@@ -1459,6 +1471,38 @@
   // Compute the byval alignment. We specify the alignment of the byval in all
   // cases so that the mid-level optimizer knows the alignment of the byval.
   unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U);
+
+  // Attempt to avoid passing indirect results using byval when possible. This
+  // is important for good codegen.
+  //
+  // We do this by coercing the value into a scalar type which the backend can
+  // handle naturally (i.e., without using byval).
+  //
+  // For simplicity, we currently only do this when we have exhausted all of the
+  // free integer registers. Doing this when there are free integer registers
+  // would require more care, as we would have to ensure that the coerced value
+  // did not claim the unused register. That would require either reording the
+  // arguments to the function (so that any subsequent inreg values came first),
+  // or only doing this optimization when there were no following arguments that
+  // might be inreg.
+  //
+  // We currently expect it to be rare (particularly in well written code) for
+  // arguments to be passed on the stack when there are still free integer
+  // registers available (this would typically imply large structs being passed
+  // by value), so this seems like a fair tradeoff for now.
+  //
+  // We can revisit this if the backend grows support for 'onstack' parameter
+  // attributes. See PR12193.
+  if (freeIntRegs == 0) {
+    uint64_t Size = getContext().getTypeSize(Ty);
+
+    // If this type fits in an eightbyte, coerce it into the matching integral
+    // type, which will end up on the stack (with alignment 8).
+    if (Align == 8 && Size <= 64)
+      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
+                                                          Size));
+  }
+
   return ABIArgInfo::getIndirect(Align);
 }
 
@@ -1874,8 +1918,10 @@
   return ABIArgInfo::getDirect(ResType);
 }
 
-ABIArgInfo X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned &neededInt,
-                                               unsigned &neededSSE) const {
+ABIArgInfo X86_64ABIInfo::classifyArgumentType(
+  QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE)
+  const
+{
   X86_64ABIInfo::Class Lo, Hi;
   classify(Ty, 0, Lo, Hi);
 
@@ -1907,7 +1953,7 @@
   case ComplexX87:
     if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty))
       ++neededInt;
-    return getIndirectResult(Ty);
+    return getIndirectResult(Ty, freeIntRegs);
 
   case SSEUp:
   case X87Up:
@@ -2015,7 +2061,8 @@
   for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
        it != ie; ++it) {
     unsigned neededInt, neededSSE;
-    it->info = classifyArgumentType(it->type, neededInt, neededSSE);
+    it->info = classifyArgumentType(it->type, freeIntRegs, neededInt,
+                                    neededSSE);
 
     // AMD64-ABI 3.2.3p3: If there are no registers available for any
     // eightbyte of an argument, the whole argument is passed on the
@@ -2025,7 +2072,7 @@
       freeIntRegs -= neededInt;
       freeSSERegs -= neededSSE;
     } else {
-      it->info = getIndirectResult(it->type);
+      it->info = getIndirectResult(it->type, freeIntRegs);
     }
   }
 }
@@ -2091,7 +2138,7 @@
   unsigned neededInt, neededSSE;
 
   Ty = CGF.getContext().getCanonicalType(Ty);
-  ABIArgInfo AI = classifyArgumentType(Ty, neededInt, neededSSE);
+  ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE);
 
   // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
   // in the registers. If not go to step 7.

Modified: cfe/trunk/test/CodeGen/complex-indirect.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/complex-indirect.c?rev=152462&r1=152461&r2=152462&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/complex-indirect.c (original)
+++ cfe/trunk/test/CodeGen/complex-indirect.c Fri Mar  9 19:03:58 2012
@@ -1,10 +1,12 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin10 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o %t -triple=x86_64-apple-darwin10
+// RUN: FileCheck < %t %s
 
-// Make sure this doesn't crash, and that we don't generate a byval alloca
-// with insufficient alignment.
+// Make sure this doesn't crash. We used to generate a byval here and wanted to
+// verify a valid alignment, but we now realize we can use an i16 and let the
+// backend guarantee the alignment.
 
 void a(int,int,int,int,int,int,__complex__ char);
 void b(__complex__ char *y) { a(0,0,0,0,0,0,*y); }
 // CHECK: define void @b
 // CHECK: alloca { i8, i8 }*, align 8
-// CHECK: call void @a(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, { i8, i8 }* byval align 8
+// CHECK: call void @a(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i16 {{.*}})

Modified: cfe/trunk/test/CodeGen/x86_64-arguments.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_64-arguments.c?rev=152462&r1=152461&r2=152462&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/x86_64-arguments.c (original)
+++ cfe/trunk/test/CodeGen/x86_64-arguments.c Fri Mar  9 19:03:58 2012
@@ -345,3 +345,12 @@
 typedef float v46 __attribute((vector_size(8)));
 void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
 void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
+
+// Check that we pass the struct below without using byval, which helps out
+// codegen.
+//
+// CHECK: @test47
+// CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+struct s47 { unsigned a; };
+void f47(int,int,int,int,int,int,struct s47);
+void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }