[cfe-commits] r165816 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/ppc64-struct-onefloat.c

Fri Oct 12 12:26:17 PDT 2012

Author: wschmidt
Date: Fri Oct 12 14:26:17 2012
New Revision: 165816

URL: http://llvm.org/viewvc/llvm-project?rev=165816&view=rev
Log:
This patch addresses PR13948.

For 64-bit PowerPC SVR4, an aggregate containing only one
floating-point field (float, double, or long double) must be passed in
a register as though just that field were present.  This patch
addresses the issue during Clang code generation by specifying in the
ABIArgInfo for the argument that the underlying type is passed
directly in a register.  The included test case verifies flat and
nested structs for the three data types.

Added:
    cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c
Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=165816&r1=165815&r2=165816&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Fri Oct 12 14:26:17 2012
@@ -2602,13 +2602,31 @@
 public:
   PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
-  // TODO: Could override computeInfo to model the ABI more completely if
-  // it would be helpful.  Example: We might remove the byVal flag from
-  // aggregate arguments that fit in a register to avoid pushing them to
-  // memory on function entry.  Note that this is a performance optimization,
-  // not a compliance issue.  In general we prefer to keep ABI details in
-  // the back end where possible, but modifying an argument flag seems like
-  // a good thing to do before invoking the back end.
+  // TODO: We can add more logic to computeInfo to improve performance.
+  // Example: For aggregate arguments that fit in a register, we could
+  // use getDirectInReg (as is done below for structs containing a single
+  // floating-point value) to avoid pushing them to memory on function
+  // entry.  This would require changing the logic in PPCISelLowering
+  // when lowering the parameters in the caller and args in the callee.
+  virtual void computeInfo(CGFunctionInfo &FI) const {
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+         it != ie; ++it) {
+      // We rely on the default argument classification for the most part.
+      // One exception:  An aggregate containing a single floating-point
+      // item must be passed in a register if one is available.
+      const Type *T = isSingleElementStruct(it->type, getContext());
+      if (T) {
+        const BuiltinType *BT = T->getAs<BuiltinType>();
+        if (BT && BT->isFloatingPoint()) {
+          QualType QT(T, 0);
+          it->info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
+          continue;
+        }
+      }
+      it->info = classifyArgumentType(it->type);
+    }
+  }
 
   virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, 
                                  QualType Ty,

Added: cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c?rev=165816&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c (added)
+++ cfe/trunk/test/CodeGen/ppc64-struct-onefloat.c Fri Oct 12 14:26:17 2012
@@ -0,0 +1,65 @@
+// REQUIRES: ppc64-registered-target
+// RUN: %clang_cc1 -O0 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+typedef struct s1 { float f; } Sf;
+typedef struct s2 { double d; } Sd;
+typedef struct s3 { long double ld; } Sld;
+typedef struct s4 { Sf fs; } SSf;
+typedef struct s5 { Sd ds; } SSd;
+typedef struct s6 { Sld lds; } SSld;
+
+void bar(Sf a, Sd b, Sld c, SSf d, SSd e, SSld f) {}
+
+// CHECK: define void @bar
+// CHECK:  %a = alloca %struct.s1, align 4
+// CHECK:  %b = alloca %struct.s2, align 8
+// CHECK:  %c = alloca %struct.s3, align 16
+// CHECK:  %d = alloca %struct.s4, align 4
+// CHECK:  %e = alloca %struct.s5, align 8
+// CHECK:  %f = alloca %struct.s6, align 16
+// CHECK:  %coerce.dive = getelementptr %struct.s1* %a, i32 0, i32 0
+// CHECK:  store float %a.coerce, float* %coerce.dive, align 1
+// CHECK:  %coerce.dive1 = getelementptr %struct.s2* %b, i32 0, i32 0
+// CHECK:  store double %b.coerce, double* %coerce.dive1, align 1
+// CHECK:  %coerce.dive2 = getelementptr %struct.s3* %c, i32 0, i32 0
+// CHECK:  store ppc_fp128 %c.coerce, ppc_fp128* %coerce.dive2, align 1
+// CHECK:  %coerce.dive3 = getelementptr %struct.s4* %d, i32 0, i32 0
+// CHECK:  %coerce.dive4 = getelementptr %struct.s1* %coerce.dive3, i32 0, i32 0
+// CHECK:  store float %d.coerce, float* %coerce.dive4, align 1
+// CHECK:  %coerce.dive5 = getelementptr %struct.s5* %e, i32 0, i32 0
+// CHECK:  %coerce.dive6 = getelementptr %struct.s2* %coerce.dive5, i32 0, i32 0
+// CHECK:  store double %e.coerce, double* %coerce.dive6, align 1
+// CHECK:  %coerce.dive7 = getelementptr %struct.s6* %f, i32 0, i32 0
+// CHECK:  %coerce.dive8 = getelementptr %struct.s3* %coerce.dive7, i32 0, i32 0
+// CHECK:  store ppc_fp128 %f.coerce, ppc_fp128* %coerce.dive8, align 1
+// CHECK:  ret void
+
+void foo(void) 
+{
+  Sf p1 = { 22.63f };
+  Sd p2 = { 19.47 };
+  Sld p3 = { -155.1l };
+  SSf p4 = { { 22.63f } };
+  SSd p5 = { { 19.47 } };
+  SSld p6 = { { -155.1l } };
+  bar(p1, p2, p3, p4, p5, p6);
+}
+
+// CHECK: define void @foo
+// CHECK:  %coerce.dive = getelementptr %struct.s1* %p1, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load float* %coerce.dive, align 1
+// CHECK:  %coerce.dive1 = getelementptr %struct.s2* %p2, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load double* %coerce.dive1, align 1
+// CHECK:  %coerce.dive2 = getelementptr %struct.s3* %p3, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load ppc_fp128* %coerce.dive2, align 1
+// CHECK:  %coerce.dive3 = getelementptr %struct.s4* %p4, i32 0, i32 0
+// CHECK:  %coerce.dive4 = getelementptr %struct.s1* %coerce.dive3, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load float* %coerce.dive4, align 1
+// CHECK:  %coerce.dive5 = getelementptr %struct.s5* %p5, i32 0, i32 0
+// CHECK:  %coerce.dive6 = getelementptr %struct.s2* %coerce.dive5, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load double* %coerce.dive6, align 1
+// CHECK:  %coerce.dive7 = getelementptr %struct.s6* %p6, i32 0, i32 0
+// CHECK:  %coerce.dive8 = getelementptr %struct.s3* %coerce.dive7, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load ppc_fp128* %coerce.dive8, align 1
+// CHECK:  call void @bar(float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, ppc_fp128 inreg %{{[0-9]+}}, float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, ppc_fp128 inreg %{{[0-9]+}})
+// CHECK:  ret void