[cfe-commits] r107048 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/x86_64-arguments.c test/CodeGenCXX/alloca-align.cpp test/CodeGenCXX/virtual-functions-incomplete-types.cpp

Mon Jun 28 12:56:59 PDT 2010

Author: lattner
Date: Mon Jun 28 14:56:59 2010
New Revision: 107048

URL: http://llvm.org/viewvc/llvm-project?rev=107048&view=rev
Log:
X86-64:
pass/return structs of float/int as float/i32 instead of double/i64
to make the code generated for ABI cleaner.  Passing in the low part
of a double is the same as passing in a float.

For example, we now compile:

struct DeclGroup { float NumDecls; };
float foo(DeclGroup D);
void bar(DeclGroup *D) {
 foo(*D);
}

into:

%struct.DeclGroup = type { float }

define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
  %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
  %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
  store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
  %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
  %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
  %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
  %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
  %0 = load float* %coerce.dive, align 1          ; <float> [#uses=1]
  %call = call float @_Z3foo9DeclGroup(float %0)  ; <float> [#uses=0]
  ret void
}

instead of:

%struct.DeclGroup = type { float }

define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
  %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
  %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
  %tmp3 = alloca double                           ; <double*> [#uses=2]
  store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
  %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
  %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
  %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
  %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
  %0 = bitcast double* %tmp3 to float*            ; <float*> [#uses=1]
  %1 = load float* %coerce.dive                   ; <float> [#uses=1]
  store float %1, float* %0, align 1
  %2 = load double* %tmp3                         ; <double> [#uses=1]
  %call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0]
  ret void
}

which is this machine code (at -O0):

__Z3barP9DeclGroup:
	subq	$24, %rsp
	movq	%rdi, 16(%rsp)
	movq	16(%rsp), %rdi
	leaq	8(%rsp), %rax
	movl	(%rdi), %ecx
	movl	%ecx, (%rax)
	movss	8(%rsp), %xmm0
	callq	__Z3foo9DeclGroup
	addq	$24, %rsp
	ret

vs this:

__Z3barP9DeclGroup:
	subq	$24, %rsp
	movq	%rdi, 16(%rsp)
	movq	16(%rsp), %rdi
	leaq	8(%rsp), %rax
	movl	(%rdi), %ecx
	movl	%ecx, (%rax)
	movss	8(%rsp), %xmm0
	movss	%xmm0, (%rsp)
	movsd	(%rsp), %xmm0
	callq	__Z3foo9DeclGroup
	addq	$24, %rsp
	ret

At -O3, it is the difference between this now:

__Z3barP9DeclGroup:
	movss	(%rdi), %xmm0
	jmp	__Z3foo9DeclGroup  # TAILCALL

vs this before:

__Z3barP9DeclGroup:
	movl	(%rdi), %eax
	movd	%rax, %xmm0
	jmp	__Z3foo9DeclGroup  # TAILCALL


Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp
    cfe/trunk/test/CodeGen/x86_64-arguments.c
    cfe/trunk/test/CodeGenCXX/alloca-align.cpp
    cfe/trunk/test/CodeGenCXX/virtual-functions-incomplete-types.cpp

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=107048&r1=107047&r2=107048&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon Jun 28 14:56:59 2010
@@ -1086,6 +1086,13 @@
     if (Ty->isIntegralOrEnumerationType() || Ty->hasPointerRepresentation())
       return (Ty->isPromotableIntegerType() ?
               ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+    
+    // If this is a 32-bit structure that is passed as an int64, then it will be
+    // passed in the low 32-bits of a 64-bit GPR, which is the same as how an
+    // i32 is passed.  Coerce to a i32 instead of a i64.
+    if (Context.getTypeSizeInChars(Ty).getQuantity() == 4)
+      CoerceTo = llvm::Type::getInt32Ty(CoerceTo->getContext());
+    
   } else if (CoerceTo->isDoubleTy()) {
     assert(Ty.isCanonical() && "should always have a canonical type here");
     assert(!Ty.hasQualifiers() && "should never have a qualified type here");
@@ -1094,6 +1101,11 @@
     if (Ty == Context.FloatTy || Ty == Context.DoubleTy)
       return ABIArgInfo::getDirect();
 
+    // If this is a 32-bit structure that is passed as a double, then it will be
+    // passed in the low 32-bits of the XMM register, which is the same as how a
+    // float is passed.  Coerce to a float instead of a double.
+    if (Context.getTypeSizeInChars(Ty).getQuantity() == 4)
+      CoerceTo = llvm::Type::getFloatTy(CoerceTo->getContext());
   }
 
   return ABIArgInfo::getCoerce(CoerceTo);

Modified: cfe/trunk/test/CodeGen/x86_64-arguments.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_64-arguments.c?rev=107048&r1=107047&r2=107048&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/x86_64-arguments.c (original)
+++ cfe/trunk/test/CodeGen/x86_64-arguments.c Mon Jun 28 14:56:59 2010
@@ -92,9 +92,10 @@
 void f17(float a, float b, float c, float d, float e, float f, float g, float h,
          long double X) {}
 
-// Check for valid coercion.
-// CHECK: [[f18_t1:%.*]] = trunc i64 {{.*}} to i32
-// CHECK: store i32 [[f18_t1]], i32* 
+// Check for valid coercion.  The struct should be passed/returned as i32, not
+// as i64 for better code quality.
+// rdar://8135035
+// CHECK: define void @f18(i32 %a, i32) 
 struct f18_s0 { int f0; };
 void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
 

Modified: cfe/trunk/test/CodeGenCXX/alloca-align.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/alloca-align.cpp?rev=107048&r1=107047&r2=107048&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/alloca-align.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/alloca-align.cpp Mon Jun 28 14:56:59 2010
@@ -18,7 +18,7 @@
   (void) (struct s0) { 0, 0, 0, 0 };
 }
 
-// CHECK: define i64 @f2
+// CHECK: define i32 @f2
 // CHECK: alloca %struct.s1, align 2
 struct s1 { short x; short y; };
 extern "C" struct s1 f2(int a, struct s1 *x, struct s1 *y) {

Modified: cfe/trunk/test/CodeGenCXX/virtual-functions-incomplete-types.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/virtual-functions-incomplete-types.cpp?rev=107048&r1=107047&r2=107048&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/virtual-functions-incomplete-types.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/virtual-functions-incomplete-types.cpp Mon Jun 28 14:56:59 2010
@@ -9,7 +9,7 @@
 
 void B::f() { }
 
-// CHECK: define i64 @_ZN1D1gEv(%struct.B* %this)
+// CHECK: define i32 @_ZN1D1gEv(%struct.B* %this)
 // CHECK: declare void @_ZN1B1gEv()
 
 struct C;