[cfe-commits] r172438 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/ppc64-varargs-complex.c

Mon Jan 14 09:45:36 PST 2013

Author: wschmidt
Date: Mon Jan 14 11:45:36 2013
New Revision: 172438

URL: http://llvm.org/viewvc/llvm-project?rev=172438&view=rev
Log:
This patch addresses varargs processing for small complex types under
the 64-bit PowerPC ELF ABI.

The ABI requires that the real and imaginary parts of a complex argument
each occupy their own doubleword.  Arguments smaller than 8 bytes are
right-adjusted within the doubleword.

Clang expects EmitVAARG() to return a pointer to a structure in which
the real and imaginary parts are packed adjacently in memory.  To accomplish
this, we generate code to load the code appropriately from the varargs
location and pack the values into a temporary variable in the form Clang
expects, returning a pointer to that structure.

The test case demonstrates correct code generation for all "small" complex
types on PPC64:  int, short, char, and float.

Added:
    cfe/trunk/test/CodeGen/ppc64-varargs-complex.c
Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=172438&r1=172437&r2=172438&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon Jan 14 11:45:36 2013
@@ -2825,14 +2825,52 @@
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
 
-  // Update the va_list pointer.
+  // Update the va_list pointer.  The pointer should be bumped by the
+  // size of the object.  We can trust getTypeSize() except for a complex
+  // type whose base type is smaller than a doubleword.  For these, the
+  // size of the object is 16 bytes; see below for further explanation.
   unsigned SizeInBytes = CGF.getContext().getTypeSize(Ty) / 8;
+  QualType BaseTy;
+  unsigned CplxBaseSize = 0;
+
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
+    BaseTy = CTy->getElementType();
+    CplxBaseSize = CGF.getContext().getTypeSize(BaseTy) / 8;
+    if (CplxBaseSize < 8)
+      SizeInBytes = 16;
+  }
+
   unsigned Offset = llvm::RoundUpToAlignment(SizeInBytes, 8);
   llvm::Value *NextAddr =
     Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int64Ty, Offset),
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
 
+  // If we have a complex type and the base type is smaller than 8 bytes,
+  // the ABI calls for the real and imaginary parts to be right-adjusted
+  // in separate doublewords.  However, Clang expects us to produce a
+  // pointer to a structure with the two parts packed tightly.  So generate
+  // loads of the real and imaginary parts relative to the va_list pointer,
+  // and store them to a temporary structure.
+  if (CplxBaseSize && CplxBaseSize < 8) {
+    llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
+    llvm::Value *ImagAddr = RealAddr;
+    RealAddr = Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
+    ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize));
+    llvm::Type *PBaseTy = llvm::PointerType::getUnqual(CGF.ConvertType(BaseTy));
+    RealAddr = Builder.CreateIntToPtr(RealAddr, PBaseTy);
+    ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy);
+    llvm::Value *Real = Builder.CreateLoad(RealAddr, false, ".vareal");
+    llvm::Value *Imag = Builder.CreateLoad(ImagAddr, false, ".vaimag");
+    llvm::Value *Ptr = CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty),
+                                            "vacplx");
+    llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, ".real");
+    llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, ".imag");
+    Builder.CreateStore(Real, RealPtr, false);
+    Builder.CreateStore(Imag, ImagPtr, false);
+    return Ptr;
+  }
+
   // If the argument is smaller than 8 bytes, it is right-adjusted in
   // its doubleword slot.  Adjust the pointer to pick it up from the
   // correct offset.

Added: cfe/trunk/test/CodeGen/ppc64-varargs-complex.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ppc64-varargs-complex.c?rev=172438&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/ppc64-varargs-complex.c (added)
+++ cfe/trunk/test/CodeGen/ppc64-varargs-complex.c Mon Jan 14 11:45:36 2013
@@ -0,0 +1,73 @@
+// REQUIRES: ppc64-registered-target
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+#include <stdarg.h>
+
+void testva (int n, ...)
+{
+  va_list ap;
+
+  _Complex int i   = va_arg(ap, _Complex int);
+  // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR40]], i64 16
+  // CHECK-NEXT: store i8* %[[VAR41]], i8** %[[VAR100]]
+  // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = ptrtoint i8* %[[VAR40]] to i64
+  // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 4
+  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 12
+  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR2]] to i32*
+  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR3]] to i32*
+  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32* %[[VAR4]]
+  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32* %[[VAR5]]
+  // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }* %[[VAR0]], i32 0, i32 1
+  // CHECK-NEXT: store i32 %[[VAR6]], i32* %[[VAR8]]
+  // CHECK-NEXT: store i32 %[[VAR7]], i32* %[[VAR9]]
+
+  _Complex short s = va_arg(ap, _Complex short);
+  // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR50]], i64 16
+  // CHECK-NEXT: store i8* %[[VAR51]], i8** %[[VAR100]]
+  // CHECK: %[[VAR11:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
+  // CHECK-NEXT: %[[VAR12:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 6
+  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 14
+  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR12]] to i16*
+  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR13]] to i16*
+  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16* %[[VAR14]]
+  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16* %[[VAR15]]
+  // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }* %[[VAR10]], i32 0, i32 1
+  // CHECK-NEXT: store i16 %[[VAR16]], i16* %[[VAR18]]
+  // CHECK-NEXT: store i16 %[[VAR17]], i16* %[[VAR19]]
+
+  _Complex char c  = va_arg(ap, _Complex char);
+  // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR60]], i64 16
+  // CHECK-NEXT: store i8* %[[VAR61]], i8** %[[VAR100]]
+  // CHECK: %[[VAR21:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
+  // CHECK-NEXT: %[[VAR22:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 7
+  // CHECK-NEXT: %[[VAR23:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 15
+  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR22]] to i8*
+  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR23]] to i8*
+  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8* %[[VAR24]]
+  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8* %[[VAR25]]
+  // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }* %[[VAR20]], i32 0, i32 1
+  // CHECK-NEXT: store i8 %[[VAR26]], i8* %[[VAR28]]
+  // CHECK-NEXT: store i8 %[[VAR27]], i8* %[[VAR29]]
+
+  _Complex float f = va_arg(ap, _Complex float);
+  // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8** %[[VAR100:[A-Za-z0-9.]+]]
+  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8* %[[VAR70]], i64 16
+  // CHECK-NEXT: store i8* %[[VAR71]], i8** %[[VAR100]]
+  // CHECK: %[[VAR31:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
+  // CHECK-NEXT: %[[VAR32:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 4
+  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 12
+  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR32]] to float*
+  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR33]] to float*
+  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float* %[[VAR34]]
+  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float* %[[VAR35]]
+  // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
+  // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }* %[[VAR30]], i32 0, i32 1
+  // CHECK-NEXT: store float %[[VAR36]], float* %[[VAR38]]
+  // CHECK-NEXT: store float %[[VAR37]], float* %[[VAR39]]
+}