[cfe-commits] r107099 - in /cfe/trunk: lib/CodeGen/CGCall.cpp test/CodeGen/x86_64-arguments.c test/CodeGenCXX/x86_64-arguments.cpp

Mon Jun 28 16:44:11 PDT 2010

Author: lattner
Date: Mon Jun 28 18:44:11 2010
New Revision: 107099

URL: http://llvm.org/viewvc/llvm-project?rev=107099&view=rev
Log:
Change CGCall to handle the "coerce" case where the coerce-to type
is a FCA to pass each of the elements as individual scalars.  This
produces code fast isel is less likely to reject and is easier on
the optimizers.

For example, before we would compile:
struct DeclGroup { long NumDecls; char * Y; };
char * foo(DeclGroup D) {
  return D.NumDecls+D.Y;
}

to:
%struct.DeclGroup = type { i64, i64 }

define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind {
entry:
  %D = alloca %struct.DeclGroup, align 8          ; <%struct.DeclGroup*> [#uses=3]
  store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1
  %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
  %tmp1 = load i64* %tmp                          ; <i64> [#uses=1]
  %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
  %tmp3 = load i64* %tmp2                         ; <i64> [#uses=1]
  %add = add nsw i64 %tmp1, %tmp3                 ; <i64> [#uses=1]
  ret i64 %add
}

Now we get:

%0 = type { i64, i64 }
%struct.DeclGroup = type { i64, i8* }

define i8* @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
  %D = alloca %struct.DeclGroup, align 8          ; <%struct.DeclGroup*> [#uses=3]
  %2 = insertvalue %0 undef, i64 %0, 0            ; <%0> [#uses=1]
  %3 = insertvalue %0 %2, i64 %1, 1               ; <%0> [#uses=1]
  %4 = bitcast %struct.DeclGroup* %D to %0*       ; <%0*> [#uses=1]
  store %0 %3, %0* %4, align 1
  %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
  %tmp1 = load i64* %tmp                          ; <i64> [#uses=1]
  %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1]
  %tmp3 = load i8** %tmp2                         ; <i8*> [#uses=1]
  %add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1]
  ret i8* %add.ptr
}

Elimination of the FCA inside the function is still-to-come.


Modified:
    cfe/trunk/lib/CodeGen/CGCall.cpp
    cfe/trunk/test/CodeGen/x86_64-arguments.c
    cfe/trunk/test/CodeGenCXX/x86_64-arguments.cpp

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=107099&r1=107098&r2=107099&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jun 28 18:44:11 2010
@@ -240,7 +240,8 @@
     return *FI;
 
   // Construct the function info.
-  FI = new CGFunctionInfo(CC, Info.getNoReturn(), Info.getRegParm(), ResTy, ArgTys);
+  FI = new CGFunctionInfo(CC, Info.getNoReturn(), Info.getRegParm(), ResTy,
+                          ArgTys);
   FunctionInfos.InsertNode(FI, InsertPos);
 
   // Compute ABI information.
@@ -259,6 +260,8 @@
     NoReturn(_NoReturn), RegParm(_RegParm)
 {
   NumArgs = ArgTys.size();
+  
+  // FIXME: Coallocate with the CGFunctionInfo object.
   Args = new ArgInfo[1 + NumArgs];
   Args[0].type = ResTy;
   for (unsigned i = 0; i < NumArgs; ++i)
@@ -593,9 +596,19 @@
     case ABIArgInfo::Ignore:
       break;
 
-    case ABIArgInfo::Coerce:
-      ArgTys.push_back(AI.getCoerceToType());
+    case ABIArgInfo::Coerce: {
+      // If the coerce-to type is a first class aggregate, flatten it.  Either
+      // way is semantically identical, but fast-isel and the optimizer
+      // generally likes scalar values better than FCAs.
+      const llvm::Type *ArgTy = AI.getCoerceToType();
+      if (const llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgTy)) {
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+          ArgTys.push_back(STy->getElementType(i));
+      } else {
+        ArgTys.push_back(ArgTy);
+      }
       break;
+    }
 
     case ABIArgInfo::Indirect: {
       // indirect arguments are always on the stack, which is addr space #0.
@@ -713,7 +726,12 @@
 
     switch (AI.getKind()) {
     case ABIArgInfo::Coerce:
-      break;
+      if (const llvm::StructType *STy =
+          dyn_cast<llvm::StructType>(AI.getCoerceToType()))
+        Index += STy->getNumElements();
+      else
+        ++Index;
+      continue;  // Skip index increment.
 
     case ABIArgInfo::Indirect:
       if (AI.getIndirectByVal())
@@ -806,7 +824,7 @@
 
     switch (ArgI.getKind()) {
     case ABIArgInfo::Indirect: {
-      llvm::Value* V = AI;
+      llvm::Value *V = AI;
       if (hasAggregateLLVMType(Ty)) {
         // Do nothing, aggregates and complex variables are accessed by
         // reference.
@@ -826,7 +844,7 @@
     case ABIArgInfo::Extend:
     case ABIArgInfo::Direct: {
       assert(AI != Fn->arg_end() && "Argument mismatch!");
-      llvm::Value* V = AI;
+      llvm::Value *V = AI;
       if (hasAggregateLLVMType(Ty)) {
         // Create a temporary alloca to hold the argument; the rest of
         // codegen expects to access aggregates & complex values by
@@ -876,12 +894,29 @@
       continue;
 
     case ABIArgInfo::Coerce: {
-      assert(AI != Fn->arg_end() && "Argument mismatch!");
+      // If the coerce-to type is a first class aggregate, we flatten it and
+      // pass the elements. Either way is semantically identical, but fast-isel
+      // and the optimizer generally likes scalar values better than FCAs.
+      llvm::Value *FormalArg;
+      if (const llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(ArgI.getCoerceToType())) {
+        // Reconstruct the FCA here.
+        // FIXME: If we have a direct match, do nice gep/store series.
+        FormalArg = llvm::UndefValue::get(STy);
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          assert(AI != Fn->arg_end() && "Argument mismatch!");
+          FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i);
+        }
+      } else {
+        assert(AI != Fn->arg_end() && "Argument mismatch!");
+        FormalArg = AI++;
+      }
+      
       // FIXME: This is very wasteful; EmitParmDecl is just going to drop the
       // result in a new alloca anyway, so we could just store into that
       // directly if we broke the abstraction down more.
       llvm::Value *V = CreateMemTemp(Ty, "coerce");
-      CreateCoercedStore(AI, V, /*DestIsVolatile=*/false, *this);
+      CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this);
       // Match to what EmitParmDecl is expecting for this type.
       if (!CodeGenFunction::hasAggregateLLVMType(Ty)) {
         V = EmitLoadOfScalar(V, false, Ty);
@@ -892,7 +927,7 @@
         }
       }
       EmitParmDecl(*Arg, V);
-      break;
+      continue;  // Skip ++AI increment, already done.
     }
     }
 
@@ -1080,8 +1115,22 @@
         StoreComplexToAddr(RV.getComplexVal(), SrcPtr, false);
       } else
         SrcPtr = RV.getAggregateAddr();
-      Args.push_back(CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(),
-                                       *this));
+      
+      llvm::Value *SrcVal = 
+        CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
+      
+      // If the coerce-to type is a first class aggregate, we flatten it and
+      // pass the elements. Either way is semantically identical, but fast-isel
+      // and the optimizer generally likes scalar values better than FCAs.
+      if (const llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(SrcVal->getType())) {
+        // Extract the elements of the value to pass in.
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+          Args.push_back(Builder.CreateExtractValue(SrcVal, i));
+      } else {
+        Args.push_back(SrcVal);
+      }
+      
       break;
     }
 

Modified: cfe/trunk/test/CodeGen/x86_64-arguments.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_64-arguments.c?rev=107099&r1=107098&r2=107099&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/x86_64-arguments.c (original)
+++ cfe/trunk/test/CodeGen/x86_64-arguments.c Mon Jun 28 18:44:11 2010
@@ -45,7 +45,7 @@
 // Test merging/passing of upper eightbyte with X87 class.
 //
 // CHECK: define %0 @f8_1()
-// CHECK: define void @f8_2(%0)
+// CHECK: define void @f8_2(i64, double)
 union u8 {
   long double a;
   int b;

Modified: cfe/trunk/test/CodeGenCXX/x86_64-arguments.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/x86_64-arguments.cpp?rev=107099&r1=107098&r2=107099&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/x86_64-arguments.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/x86_64-arguments.cpp Mon Jun 28 18:44:11 2010
@@ -6,19 +6,19 @@
 // Basic base class test.
 struct f0_s0 { unsigned a; };
 struct f0_s1 : public f0_s0 { void *b; };
-// CHECK: define void @_Z2f05f0_s1([[i64_i64_ty]])
+// CHECK: define void @_Z2f05f0_s1(i64, i64)
 void f0(f0_s1 a0) { }
 
 // Check with two eight-bytes in base class.
 struct f1_s0 { unsigned a; unsigned b; float c; };
 struct f1_s1 : public f1_s0 { float d;};
-// CHECK: define void @_Z2f15f1_s1([[i64_double_ty]])
+// CHECK: define void @_Z2f15f1_s1(i64, double)
 void f1(f1_s1 a0) { }
 
 // Check with two eight-bytes in base class and merge.
 struct f2_s0 { unsigned a; unsigned b; float c; };
 struct f2_s1 : public f2_s0 { char d;};
-// CHECK: define void @_Z2f25f2_s1([[i64_i64_ty]])
+// CHECK: define void @_Z2f25f2_s1(i64, i64)
 void f2(f2_s1 a0) { }
 
 // PR5831
@@ -27,7 +27,7 @@
 void f3(struct s3_1 x) {}
 
 // CHECK: define i64 @_Z4f4_0M2s4i(i64)
-// CHECK: define [[i64_i64_ty]] @_Z4f4_1M2s4FivE([[i64_i64_ty]])
+// CHECK: define [[i64_i64_ty]] @_Z4f4_1M2s4FivE(i64, i64)
 struct s4 {};
 typedef int s4::* s4_mdp;
 typedef int (s4::*s4_mfp)();