r268261 - Expand aggregate arguments more often on 32-bit Windows

Mon May 2 10:41:08 PDT 2016

Author: rnk
Date: Mon May  2 12:41:07 2016
New Revision: 268261

URL: http://llvm.org/viewvc/llvm-project?rev=268261&view=rev
Log:
Expand aggregate arguments more often on 32-bit Windows

Before this change, we would pass all non-HFA record arguments on
Windows with byval. Byval often blocks optimizations and results in bad
code generation. Windows now uses the existing workaround that other
x86_32 platforms use.

I also expanded the workaround to handle C++ records with constructors
on Windows. On non-Windows platforms, we have to keep generating the
same LLVM IR prototypes if we want our bitcode to be ABI compatible.
Otherwise we will encounter mismatch issues like PR21573.

Essentially fixes PR27522 in Clang instead of LLVM.

Reviewers: hans

Differential Revision: http://reviews.llvm.org/D19756

Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp
    cfe/trunk/test/CodeGen/vectorcall.c
    cfe/trunk/test/CodeGen/windows-struct-abi.c
    cfe/trunk/test/CodeGen/x86_32-arguments-win32.c
    cfe/trunk/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
    cfe/trunk/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=268261&r1=268260&r2=268261&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon May  2 12:41:07 2016
@@ -503,72 +503,6 @@ static const Type *isSingleElementStruct
   return Found;
 }
 
-static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
-  // Treat complex types as the element type.
-  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
-    Ty = CTy->getElementType();
-
-  // Check for a type which we know has a simple scalar argument-passing
-  // convention without any padding.  (We're specifically looking for 32
-  // and 64-bit integer and integer-equivalents, float, and double.)
-  if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
-      !Ty->isEnumeralType() && !Ty->isBlockPointerType())
-    return false;
-
-  uint64_t Size = Context.getTypeSize(Ty);
-  return Size == 32 || Size == 64;
-}
-
-/// canExpandIndirectArgument - Test whether an argument type which is to be
-/// passed indirectly (on the stack) would have the equivalent layout if it was
-/// expanded into separate arguments. If so, we prefer to do the latter to avoid
-/// inhibiting optimizations.
-///
-// FIXME: This predicate is missing many cases, currently it just follows
-// llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We
-// should probably make this smarter, or better yet make the LLVM backend
-// capable of handling it.
-static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) {
-  // We can only expand structure types.
-  const RecordType *RT = Ty->getAs<RecordType>();
-  if (!RT)
-    return false;
-
-  // We can only expand (C) structures.
-  //
-  // FIXME: This needs to be generalized to handle classes as well.
-  const RecordDecl *RD = RT->getDecl();
-  if (!RD->isStruct())
-    return false;
-
-  // We try to expand CLike CXXRecordDecl.
-  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
-    if (!CXXRD->isCLike())
-      return false;
-  }
-
-  uint64_t Size = 0;
-
-  for (const auto *FD : RD->fields()) {
-    if (!is32Or64BitBasicType(FD->getType(), Context))
-      return false;
-
-    // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
-    // how to expand them yet, and the predicate for telling if a bitfield still
-    // counts as "basic" is more complicated than what we were doing previously.
-    if (FD->isBitField())
-      return false;
-
-    Size += Context.getTypeSize(FD->getType());
-  }
-
-  // Make sure there are not any holes in the struct.
-  if (Size != Context.getTypeSize(Ty))
-    return false;
-
-  return true;
-}
-
 namespace {
 Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
                        const ABIArgInfo &AI) {
@@ -959,6 +893,8 @@ class X86_32ABIInfo : public SwiftABIInf
                                 bool &NeedsPadding) const;
   bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
 
+  bool canExpandIndirectArgument(QualType Ty) const;
+
   /// \brief Rewrite the function info so that all memory arguments use
   /// inalloca.
   void rewriteWithInAlloca(CGFunctionInfo &FI) const;
@@ -1179,6 +1115,72 @@ bool X86_32ABIInfo::shouldReturnTypeInRe
   return true;
 }
 
+static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
+  // Treat complex types as the element type.
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+    Ty = CTy->getElementType();
+
+  // Check for a type which we know has a simple scalar argument-passing
+  // convention without any padding.  (We're specifically looking for 32
+  // and 64-bit integer and integer-equivalents, float, and double.)
+  if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
+      !Ty->isEnumeralType() && !Ty->isBlockPointerType())
+    return false;
+
+  uint64_t Size = Context.getTypeSize(Ty);
+  return Size == 32 || Size == 64;
+}
+
+/// Test whether an argument type which is to be passed indirectly (on the
+/// stack) would have the equivalent layout if it was expanded into separate
+/// arguments. If so, we prefer to do the latter to avoid inhibiting
+/// optimizations.
+bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
+  // We can only expand structure types.
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (!RT)
+    return false;
+  const RecordDecl *RD = RT->getDecl();
+  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+    if (!IsWin32StructABI ) {
+      // On non-Windows, we have to conservatively match our old bitcode
+      // prototypes in order to be ABI-compatible at the bitcode level.
+      if (!CXXRD->isCLike())
+        return false;
+    } else {
+      // Don't do this for dynamic classes.
+      if (CXXRD->isDynamicClass())
+        return false;
+      // Don't do this if there are any non-empty bases.
+      for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
+        if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true))
+          return false;
+      }
+    }
+  }
+
+  uint64_t Size = 0;
+
+  for (const auto *FD : RD->fields()) {
+    // Scalar arguments on the stack get 4 byte alignment on x86. If the
+    // argument is smaller than 32-bits, expanding the struct will create
+    // alignment padding.
+    if (!is32Or64BitBasicType(FD->getType(), getContext()))
+      return false;
+
+    // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
+    // how to expand them yet, and the predicate for telling if a bitfield still
+    // counts as "basic" is more complicated than what we were doing previously.
+    if (FD->isBitField())
+      return false;
+
+    Size += getContext().getTypeSize(FD->getType());
+  }
+
+  // We can do this if there was no alignment padding.
+  return Size == getContext().getTypeSize(Ty);
+}
+
 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
   // If the return value is indirect, then the hidden argument is consuming one
   // integer register.
@@ -1395,6 +1397,12 @@ bool X86_32ABIInfo::updateFreeRegs(QualT
 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 
                                              bool &InReg,
                                              bool &NeedsPadding) const {
+  // On Windows, aggregates other than HFAs are never passed in registers, and
+  // they do not consume register slots. Homogenous floating-point aggregates
+  // (HFAs) have already been dealt with at this point.
+  if (IsWin32StructABI && isAggregateTypeForABI(Ty))
+    return false;
+
   NeedsPadding = false;
   InReg = !IsMCUABI;
 
@@ -1468,23 +1476,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgume
   }
 
   if (isAggregateTypeForABI(Ty)) {
-    if (RT) {
-      // Structs are always byval on win32, regardless of what they contain.
-      if (IsWin32StructABI)
-        return getIndirectResult(Ty, true, State);
-
-      // Structures with flexible arrays are always indirect.
-      if (RT->getDecl()->hasFlexibleArrayMember())
-        return getIndirectResult(Ty, true, State);
-    }
+    // Structures with flexible arrays are always indirect.
+    // FIXME: This should not be byval!
+    if (RT && RT->getDecl()->hasFlexibleArrayMember())
+      return getIndirectResult(Ty, true, State);
 
-    // Ignore empty structs/unions.
-    if (isEmptyRecord(getContext(), Ty, true))
+    // Ignore empty structs/unions on non-Windows.
+    if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
       return ABIArgInfo::getIgnore();
 
     llvm::LLVMContext &LLVMContext = getVMContext();
     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
-    bool NeedsPadding, InReg;
+    bool NeedsPadding = false;
+    bool InReg;
     if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
       unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
@@ -1502,9 +1506,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgume
     // optimizations.
     // Don't do this for the MCU if there are still free integer registers
     // (see X86_64 ABI for full explanation).
-    if (getContext().getTypeSize(Ty) <= 4*32 &&
-        canExpandIndirectArgument(Ty, getContext()) &&
-        (!IsMCUABI || State.FreeRegs == 0))
+    if (getContext().getTypeSize(Ty) <= 4 * 32 &&
+        (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
       return ABIArgInfo::getExpandWithPadding(
           State.CC == llvm::CallingConv::X86_FastCall ||
               State.CC == llvm::CallingConv::X86_VectorCall,
@@ -1624,11 +1627,14 @@ static bool isArgInAlloca(const ABIArgIn
     return false;
   case ABIArgInfo::Direct:
   case ABIArgInfo::Extend:
-  case ABIArgInfo::Expand:
-  case ABIArgInfo::CoerceAndExpand:
     if (Info.getInReg())
       return false;
     return true;
+  case ABIArgInfo::Expand:
+  case ABIArgInfo::CoerceAndExpand:
+    // These are aggregate types which are never passed in registers when
+    // inalloca is involved.
+    return true;
   }
   llvm_unreachable("invalid enum");
 }

Modified: cfe/trunk/test/CodeGen/vectorcall.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/vectorcall.c?rev=268261&r1=268260&r2=268261&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/vectorcall.c (original)
+++ cfe/trunk/test/CodeGen/vectorcall.c Mon May  2 12:41:07 2016
@@ -9,9 +9,9 @@ void __vectorcall v2(char a, char b) {}
 // CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b)
 // X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b)
 
-struct Small { int a; };
+struct Small { int x; };
 void __vectorcall v3(int a, struct Small b, int c) {}
-// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, %struct.Small* byval align 4 %b, i32 inreg %c)
+// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c)
 // X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c)
 
 struct Large { int a[5]; };

Modified: cfe/trunk/test/CodeGen/windows-struct-abi.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/windows-struct-abi.c?rev=268261&r1=268260&r2=268261&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/windows-struct-abi.c (original)
+++ cfe/trunk/test/CodeGen/windows-struct-abi.c Mon May  2 12:41:07 2016
@@ -10,7 +10,7 @@ struct f1 return_f1(void) { while (1); }
 
 void receive_f1(struct f1 a0) { }
 
-// CHECK: define void @receive_f1(%struct.f1* byval align 4 %a0)
+// CHECK: define void @receive_f1(float %a0.0)
 
 struct f2 {
   float f;
@@ -23,7 +23,7 @@ struct f2 return_f2(void) { while (1); }
 
 void receive_f2(struct f2 a0) { }
 
-// CHECK: define void @receive_f2(%struct.f2* byval align 4 %a0)
+// CHECK: define void @receive_f2(float %a0.0, float %a0.1)
 
 struct f4 {
   float f;
@@ -38,5 +38,5 @@ struct f4 return_f4(void) { while (1); }
 
 void receive_f4(struct f4 a0) { }
 
-// CHECK: define void @receive_f4(%struct.f4* byval align 4 %a0)
+// CHECK: define void @receive_f4(float %a0.0, float %a0.1, float %a0.2, float %a0.3)
 

Modified: cfe/trunk/test/CodeGen/x86_32-arguments-win32.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_32-arguments-win32.c?rev=268261&r1=268260&r2=268261&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/x86_32-arguments-win32.c (original)
+++ cfe/trunk/test/CodeGen/x86_32-arguments-win32.c Mon May  2 12:41:07 2016
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s
 
 // CHECK-LABEL: define i64 @f1_1()
-// CHECK-LABEL: define void @f1_2(%struct.s1* byval align 4 %a0)
+// CHECK-LABEL: define void @f1_2(i32 %a0.0, i32 %a0.1)
 struct s1 {
   int a;
   int b;
@@ -31,7 +31,7 @@ struct s4 {
 struct s4 f4_1(void) { while (1) {} }
 
 // CHECK-LABEL: define i64 @f5_1()
-// CHECK-LABEL: define void @f5_2(%struct.s5* byval align 4)
+// CHECK-LABEL: define void @f5_2(double %a0.0)
 struct s5 {
   double a;
 };
@@ -39,7 +39,7 @@ struct s5 f5_1(void) { while (1) {} }
 void f5_2(struct s5 a0) {}
 
 // CHECK-LABEL: define i32 @f6_1()
-// CHECK-LABEL: define void @f6_2(%struct.s6* byval align 4 %a0)
+// CHECK-LABEL: define void @f6_2(float %a0.0)
 struct s6 {
   float a;
 };

Modified: cfe/trunk/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp?rev=268261&r1=268260&r2=268261&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp Mon May  2 12:41:07 2016
@@ -2,10 +2,10 @@
 
 // PR15768
 
-// A trivial 12 byte struct is returned indirectly.
+// A trivial 20 byte struct is returned indirectly and taken as byval.
 struct S {
   S();
-  int a, b, c;
+  int a, b, c, d, e;
 };
 
 struct C {

Modified: cfe/trunk/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp?rev=268261&r1=268260&r2=268261&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp Mon May  2 12:41:07 2016
@@ -22,6 +22,16 @@ struct SmallWithCtor {
   int x;
 };
 
+struct Multibyte {
+  char a, b, c, d;
+};
+
+struct Packed {
+  short a;
+  int b;
+  short c;
+};
+
 struct SmallWithDtor {
   SmallWithDtor();
   ~SmallWithDtor();
@@ -102,19 +112,30 @@ Big big_return() { return Big(); }
 
 void small_arg(Small s) {}
 // LINUX-LABEL: define void @_Z9small_arg5Small(i32 %s.0)
-// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(%struct.Small* byval align 4 %s)
+// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.0)
 // WIN64: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.coerce)
 
 void medium_arg(Medium s) {}
 // LINUX-LABEL: define void @_Z10medium_arg6Medium(i32 %s.0, i32 %s.1)
-// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(%struct.Medium* byval align 4 %s)
+// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i32 %s.0, i32 %s.1)
 // WIN64: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i64 %s.coerce)
 
 void small_arg_with_ctor(SmallWithCtor s) {}
 // LINUX-LABEL: define void @_Z19small_arg_with_ctor13SmallWithCtor(%struct.SmallWithCtor* byval align 4 %s)
-// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(%struct.SmallWithCtor* byval align 4 %s)
+// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.0)
 // WIN64: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.coerce)
 
+// FIXME: We could coerce to a series of i32s here if we wanted to.
+void multibyte_arg(Multibyte s) {}
+// LINUX-LABEL: define void @_Z13multibyte_arg9Multibyte(%struct.Multibyte* byval align 4 %s)
+// WIN32: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(%struct.Multibyte* byval align 4 %s)
+// WIN64: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(i32 %s.coerce)
+
+void packed_arg(Packed s) {}
+// LINUX-LABEL: define void @_Z10packed_arg6Packed(%struct.Packed* byval align 4 %s)
+// WIN32: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* byval align 4 %s)
+// WIN64: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* %s)
+
 // Test that dtors are invoked in the callee.
 void small_arg_with_dtor(SmallWithDtor s) {}
 // WIN32: define void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(<{ %struct.SmallWithDtor }>* inalloca) {{.*}} {
@@ -230,12 +251,12 @@ class Class {
 
   void thiscall_method_arg(Small s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE5Small(%class.Class* %this, i32 %s.0)
-  // WIN32: define {{.*}} void @"\01?thiscall_method_arg at Class@@QAEXUSmall@@@Z"(%class.Class* %this, %struct.Small* byval align 4 %s)
+  // WIN32: define {{.*}} void @"\01?thiscall_method_arg at Class@@QAEXUSmall@@@Z"(%class.Class* %this, i32 %s.0)
   // WIN64: define linkonce_odr void @"\01?thiscall_method_arg at Class@@QEAAXUSmall@@@Z"(%class.Class* %this, i32 %s.coerce)
 
   void thiscall_method_arg(SmallWithCtor s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13SmallWithCtor(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s)
-  // WIN32: define {{.*}} void @"\01?thiscall_method_arg at Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s)
+  // WIN32: define {{.*}} void @"\01?thiscall_method_arg at Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.0)
   // WIN64: define linkonce_odr void @"\01?thiscall_method_arg at Class@@QEAAXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.coerce)
 
   void thiscall_method_arg(Big s) {}