r371269 - Use musttail for variadic method thunks when possible

Reid Kleckner via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 6 15:55:26 PDT 2019


Author: rnk
Date: Fri Sep  6 15:55:26 2019
New Revision: 371269

URL: http://llvm.org/viewvc/llvm-project?rev=371269&view=rev
Log:
Use musttail for variadic method thunks when possible

This avoids cloning variadic virtual methods when the target supports
musttail and the return type is not covariant. I think we never
implemented this previously because it doesn't handle the covariant
case. But, in the MS ABI, there are some cases where vtable thunks must
be emitted even when the variadic method defintion is not available, so
it looks like we need to implement this. Do it for both ABIs, since it's
a nice size improvement and simplification for Itanium.

Emit an error when emitting thunks for variadic methods with a covariant
return type. This case is essentially not implementable unless the ABI
provides a way to perfectly forward variadic arguments without a tail
call.

Fixes PR43173.

Differential Revision: https://reviews.llvm.org/D67028

Added:
    cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp
Modified:
    cfe/trunk/lib/CodeGen/CGVTables.cpp
    cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp
    cfe/trunk/test/CodeGenCXX/thunks.cpp

Modified: cfe/trunk/lib/CodeGen/CGVTables.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGVTables.cpp?rev=371269&r1=371268&r2=371269&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGVTables.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGVTables.cpp Fri Sep  6 15:55:26 2019
@@ -166,6 +166,15 @@ CodeGenFunction::GenerateVarArgsThunk(ll
   llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true);
   llvm::Function *BaseFn = cast<llvm::Function>(Callee);
 
+  // Cloning can't work if we don't have a definition. The Microsoft ABI may
+  // require thunks when a definition is not available. Emit an error in these
+  // cases.
+  if (!MD->isDefined()) {
+    CGM.ErrorUnsupported(MD, "return-adjusting thunk with variadic arguments");
+    return Fn;
+  }
+  assert(!BaseFn->isDeclaration() && "cannot clone undefined variadic method");
+
   // Clone to thunk.
   llvm::ValueToValueMapTy VMap;
 
@@ -201,6 +210,8 @@ CodeGenFunction::GenerateVarArgsThunk(ll
   Builder.SetInsertPoint(&*ThisStore);
   llvm::Value *AdjustedThisPtr =
       CGM.getCXXABI().performThisAdjustment(*this, ThisPtr, Thunk.This);
+  AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr,
+                                          ThisStore->getOperand(0)->getType());
   ThisStore->setOperand(0, AdjustedThisPtr);
 
   if (!Thunk.Return.isEmpty()) {
@@ -291,14 +302,17 @@ void CodeGenFunction::EmitCallAndReturnF
                           *this, LoadCXXThisAddress(), Thunk->This)
           : LoadCXXThis();
 
-  if (CurFnInfo->usesInAlloca() || IsUnprototyped) {
-    // We don't handle return adjusting thunks, because they require us to call
-    // the copy constructor.  For now, fall through and pretend the return
-    // adjustment was empty so we don't crash.
+  // If perfect forwarding is required a variadic method, a method using
+  // inalloca, or an unprototyped thunk, use musttail. Emit an error if this
+  // thunk requires a return adjustment, since that is impossible with musttail.
+  if (CurFnInfo->usesInAlloca() || CurFnInfo->isVariadic() || IsUnprototyped) {
     if (Thunk && !Thunk->Return.isEmpty()) {
       if (IsUnprototyped)
         CGM.ErrorUnsupported(
             MD, "return-adjusting thunk with incomplete parameter type");
+      else if (CurFnInfo->isVariadic())
+        llvm_unreachable("shouldn't try to emit musttail return-adjusting "
+                         "thunks for variadic functions");
       else
         CGM.ErrorUnsupported(
             MD, "non-trivial argument copy for return-adjusting thunk");
@@ -549,16 +563,32 @@ llvm::Constant *CodeGenVTables::maybeEmi
 
   CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn);
 
+  // Thunks for variadic methods are special because in general variadic
+  // arguments cannot be perferctly forwarded. In the general case, clang
+  // implements such thunks by cloning the original function body. However, for
+  // thunks with no return adjustment on targets that support musttail, we can
+  // use musttail to perfectly forward the variadic arguments.
+  bool ShouldCloneVarArgs = false;
   if (!IsUnprototyped && ThunkFn->isVarArg()) {
-    // Varargs thunks are special; we can't just generate a call because
-    // we can't copy the varargs.  Our implementation is rather
-    // expensive/sucky at the moment, so don't generate the thunk unless
-    // we have to.
-    // FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly.
+    ShouldCloneVarArgs = true;
+    if (TI.Return.isEmpty()) {
+      switch (CGM.getTriple().getArch()) {
+      case llvm::Triple::x86_64:
+      case llvm::Triple::x86:
+      case llvm::Triple::aarch64:
+        ShouldCloneVarArgs = false;
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  if (ShouldCloneVarArgs) {
     if (UseAvailableExternallyLinkage)
       return ThunkFn;
-    ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD,
-                                                        TI);
+    ThunkFn =
+        CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, TI);
   } else {
     // Normal thunk body generation.
     CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped);

Modified: cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp?rev=371269&r1=371268&r2=371269&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/linetable-virtual-variadic.cpp Fri Sep  6 15:55:26 2019
@@ -1,5 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-directives-only %s -o - | FileCheck %s
+// Sparc64 is used because AArch64 and X86_64 would both use musttail.
+// RUN: %clang_cc1 -triple sparc64-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple sparc64-linux-gnu -emit-llvm -debug-info-kind=line-directives-only %s -o - | FileCheck %s
 // Crasher for PR22929.
 class Base {
   virtual void VariadicFunction(...);

Added: cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp?rev=371269&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/ms-thunks-variadic-return.cpp Fri Sep  6 15:55:26 2019
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fno-rtti-data -triple x86_64-windows-msvc -emit-llvm-only %s -verify
+
+// Verify that we error out on this return adjusting thunk that we can't emit.
+
+struct A {
+  virtual A *clone(const char *f, ...) = 0;
+};
+struct B : virtual A {
+  // expected-error at +1 2 {{cannot compile this return-adjusting thunk with variadic arguments yet}}
+  B *clone(const char *f, ...) override;
+};
+struct C : B { int c; };
+C c;

Modified: cfe/trunk/test/CodeGenCXX/thunks.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/thunks.cpp?rev=371269&r1=371268&r2=371269&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/thunks.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/thunks.cpp Fri Sep  6 15:55:26 2019
@@ -1,6 +1,20 @@
-// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT %s
-// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -debug-info-kind=standalone -dwarf-version=5 -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT --check-prefix=CHECK-DBG %s
-// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes | FileCheck --check-prefix=CHECK --check-prefix=CHECK-OPT %s
+// Sparc64 doesn't support musttail (yet), so it uses method cloning for
+// variadic thunks. Use it for testing.
+// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -munwind-tables -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-NONOPT %s
+// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -debug-info-kind=standalone -dwarf-version=5 -munwind-tables -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-NONOPT,CHECK-DBG %s
+// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
+// RUN:     | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-OPT %s
+
+// Test x86_64, which uses musttail for variadic thunks.
+// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
+// RUN:     | FileCheck --check-prefixes=CHECK,CHECK-TAIL,CHECK-OPT %s
+
+// Finally, reuse these tests for the MS ABI.
+// RUN: %clang_cc1 %s -triple=x86_64-windows-msvc -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
+// RUN:     | FileCheck --check-prefixes=WIN64 %s
+
 
 namespace Test1 {
 
@@ -23,6 +37,11 @@ struct C : A, B {
 // CHECK-LABEL: define void @_ZThn8_N5Test11C1fEv(
 // CHECK-DBG-NOT: dbg.declare
 // CHECK: ret void
+//
+// WIN64-LABEL: define dso_local void @"?f at C@Test1@@UEAAXXZ"(
+// WIN64-LABEL: define linkonce_odr dso_local void @"?f at C@Test1@@W7EAAXXZ"(
+// WIN64: getelementptr i8, i8* {{.*}}, i32 -8
+// WIN64: ret void
 void C::f() { }
 
 }
@@ -45,6 +64,10 @@ struct B : virtual A {
 // CHECK: ret void
 void B::f() { }
 
+// No thunk is used for this case in the MS ABI.
+// WIN64-LABEL: define dso_local void @"?f at B@Test2@@UEAAXXZ"(
+// WIN64-NOT: define {{.*}} void @"?f at B@Test2
+
 }
 
 namespace Test3 {
@@ -65,6 +88,7 @@ struct B : A {
 };
 
 // CHECK: define %{{.*}}* @_ZTch0_v0_n24_N5Test31B1fEv(
+// WIN64: define weak_odr dso_local %{{.*}} @"?f at B@Test3@@QEAAPEAUV1 at 2@XZ"(
 V2 *B::f() { return 0; }
 
 }
@@ -92,6 +116,10 @@ struct __attribute__((visibility("protec
 // CHECK: ret void
 void C::f() { }
 
+// Visibility doesn't matter on COFF, but whatever. We could add an ELF test
+// mode later.
+// WIN64-LABEL: define protected void @"?f at C@Test4@@UEAAXXZ"(
+// WIN64-LABEL: define linkonce_odr protected void @"?f at C@Test4@@W7EAAXXZ"(
 }
 
 // Check that the thunk gets internal linkage.
@@ -119,6 +147,8 @@ namespace Test4B {
     c.f();
   }
 }
+// Not sure why this isn't delayed like in Itanium.
+// WIN64-LABEL: define internal void @"?f at C@?A0xAEF74CE7 at Test4B@@UEAAXXZ"(
 
 namespace Test5 {
 
@@ -134,6 +164,7 @@ struct B : virtual A {
 void f(B b) {
   b.f();
 }
+// No thunk in MS ABI in this case.
 }
 
 namespace Test6 {
@@ -178,6 +209,10 @@ namespace Test6 {
   // CHECK: {{call void @_ZN5Test66Thunks1fEv.*sret}}
   // CHECK: ret void
   X Thunks::f() { return X(); }
+
+  // WIN64-LABEL: define linkonce_odr dso_local void @"?f at Thunks@Test6@@WBA at EAA?AUX at 2@XZ"({{.*}} sret %{{.*}})
+  // WIN64-NOT: memcpy
+  // WIN64: tail call void @"?f at Thunks@Test6@@UEAA?AUX at 2@XZ"({{.*}} sret %{{.*}})
 }
 
 namespace Test7 {
@@ -224,6 +259,8 @@ namespace Test7 {
   // CHECK-NOT: memcpy
   // CHECK: ret void
   void testD() { D d; }
+
+  // MS C++ ABI doesn't use a thunk, so this case isn't interesting.
 }
 
 namespace Test8 {
@@ -241,6 +278,8 @@ namespace Test8 {
   // CHECK-NOT: memcpy
   // CHECK: ret void
   void C::bar(NonPOD var) {}
+
+  // MS C++ ABI doesn't use a thunk, so this case isn't interesting.
 }
 
 // PR7241: Emitting thunks for a method shouldn't require the vtable for
@@ -287,6 +326,16 @@ namespace Test11 {
   // CHECK: define {{.*}} @_ZTch0_v0_n32_N6Test111C1fEv(
   // CHECK-DBG-NOT: dbg.declare
   // CHECK: ret
+
+  // WIN64-LABEL: define dso_local %{{.*}}* @"?f at C@Test11@@UEAAPEAU12 at XZ"(i8*
+
+  // WIN64-LABEL: define weak_odr dso_local %{{.*}}* @"?f at C@Test11@@QEAAPEAUA at 2@XZ"(i8*
+  // WIN64: call %{{.*}}* @"?f at C@Test11@@UEAAPEAU12 at XZ"(i8* %{{.*}})
+  //
+  // Match the vbtable return adjustment.
+  // WIN64: load i32*, i32** %{{[^,]*}}, align 8
+  // WIN64: getelementptr inbounds i32, i32* %{{[^,]*}}, i32 1
+  // WIN64: load i32, i32* %{{[^,]*}}, align 4
 }
 
 // Varargs thunk test.
@@ -301,7 +350,8 @@ namespace Test12 {
     virtual void c();
     virtual C* f(int x, ...);
   };
-  C* C::f(int x, ...) { return this; }
+  C* makeC();
+  C* C::f(int x, ...) { return makeC(); }
 
   // C::f
   // CHECK: define {{.*}} @_ZN6Test121C1fEiz
@@ -312,6 +362,28 @@ namespace Test12 {
   // CHECK-DBG-NOT: dbg.declare
   // CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 -8
   // CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 8
+
+  // The vtable layout goes:
+  // C vtable in A:
+  // - f impl, no adjustment
+  // C vtable in B:
+  // - f thunk 2, covariant, clone
+  // - f thunk 2, musttail this adjust to impl
+  // FIXME: The weak_odr linkage is probably not necessary and just an artifact
+  // of Itanium ABI details.
+  // WIN64-LABEL: define dso_local {{.*}} @"?f at C@Test12@@UEAAPEAU12 at HZZ"(
+  // WIN64: call %{{.*}}* @"?makeC at Test12@@YAPEAUC at 1@XZ"()
+  //
+  // This thunk needs return adjustment, clone.
+  // WIN64-LABEL: define weak_odr dso_local {{.*}} @"?f at C@Test12@@W7EAAPEAUB at 2@HZZ"(
+  // WIN64: call %{{.*}}* @"?makeC at Test12@@YAPEAUC at 1@XZ"()
+  // WIN64: getelementptr inbounds i8, i8* %{{.*}}, i32 8
+  //
+  // Musttail call back to the A implementation after this adjustment from B to A.
+  // WIN64-LABEL: define linkonce_odr dso_local %{{.*}}* @"?f at C@Test12@@W7EAAPEAU12 at HZZ"(
+  // WIN64: getelementptr i8, i8* %{{[^,]*}}, i32 -8
+  // WIN64: musttail call {{.*}} @"?f at C@Test12@@UEAAPEAU12 at HZZ"(
+  C c;
 }
 
 // PR13832
@@ -339,6 +411,17 @@ namespace Test13 {
   // CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 -24
   // CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 8
   // CHECK: ret %"struct.Test13::D"*
+
+  // WIN64-LABEL: define weak_odr dso_local dereferenceable(32) %"struct.Test13::D"* @"?foo1 at D@Test13@@$4PPPPPPPE at A@EAAAEAUB1 at 2@XZ"(
+  //    This adjustment.
+  // WIN64: getelementptr inbounds i8, i8* {{.*}}, i64 -12
+  //    Call implementation.
+  // WIN64: call {{.*}} @"?foo1 at D@Test13@@UEAAAEAU12 at XZ"(i8* {{.*}})
+  //    Virtual + nonvirtual return adjustment.
+  // WIN64: load i32*, i32** %{{[^,]*}}, align 8
+  // WIN64: getelementptr inbounds i32, i32* %{{[^,]*}}, i32 1
+  // WIN64: load i32, i32* %{{[^,]*}}, align 4
+  // WIN64: getelementptr inbounds i8, i8* %{{[^,]*}}, i32 %{{[^,]*}}
 }
 
 namespace Test14 {
@@ -374,9 +457,16 @@ namespace Test15 {
   void C::c() {}
 
   // C::c
-  // CHECK: declare void @_ZN6Test151C1fEiz
+  // CHECK-CLONE: declare void @_ZN6Test151C1fEiz
   // non-virtual thunk to C::f
-  // CHECK: declare void @_ZThn8_N6Test151C1fEiz
+  // CHECK-CLONE: declare void @_ZThn8_N6Test151C1fEiz
+
+  // If we have musttail, then we emit the thunk as available_externally.
+  // CHECK-TAIL: declare void @_ZN6Test151C1fEiz
+  // CHECK-TAIL: define available_externally void @_ZThn8_N6Test151C1fEiz({{.*}})
+  // CHECK-TAIL: musttail call void (%"struct.Test15::C"*, i32, ...) @_ZN6Test151C1fEiz({{.*}}, ...)
+
+  // MS C++ ABI doesn't use a thunk, so this case isn't interesting.
 }
 
 namespace Test16 {
@@ -398,6 +488,33 @@ D::~D() {}
 // CHECK: ret void
 }
 
+namespace Test17 {
+class A {
+  virtual void f(const char *, ...);
+};
+class B {
+  virtual void f(const char *, ...);
+};
+class C : A, B {
+  virtual void anchor();
+  void f(const char *, ...) override;
+};
+// Key method and object anchor vtable for Itanium and MSVC.
+void C::anchor() {}
+C c;
+
+// CHECK-CLONE-LABEL: declare void @_ZThn8_N6Test171C1fEPKcz(
+
+// CHECK-TAIL-LABEL: define available_externally void @_ZThn8_N6Test171C1fEPKcz(
+// CHECK-TAIL: getelementptr inbounds i8, i8* %{{.*}}, i64 -8
+// CHECK-TAIL: musttail call {{.*}} @_ZN6Test171C1fEPKcz({{.*}}, ...)
+
+// MSVC-LABEL: define linkonce_odr dso_local void @"?f at C@Test17@@G7EAAXPEBDZZ"
+// MSVC-SAME: (%"class.Test17::C"* %this, i8* %[[ARG:[^,]+]], ...)
+// MSVC: getelementptr i8, i8* %{{.*}}, i32 -8
+// MSVC: musttail call void (%"class.Test17::C"*, i8*, ...) @"?f at C@Test17@@EEAAXPEBDZZ"(%"class.Test17::C"* %{{.*}}, i8* %[[ARG]], ...)
+}
+
 /**** The following has to go at the end of the file ****/
 
 // checking without opt
@@ -421,5 +538,9 @@ D::~D() {}
 // CHECK-OPT-LABEL: define linkonce_odr void @_ZN6Test101C3fooEv
 // CHECK-OPT-LABEL: define linkonce_odr void @_ZThn8_N6Test101C3fooEv
 
+// This is from Test10:
+// WIN64-LABEL: define linkonce_odr dso_local void @"?foo at C@Test10@@UEAAXXZ"(
+// WIN64-LABEL: define linkonce_odr dso_local void @"?foo at C@Test10@@W7EAAXXZ"(
+
 // CHECK-NONOPT: attributes [[NUW]] = { noinline nounwind optnone uwtable{{.*}} }
 // CHECK-OPT: attributes [[NUW]] = { nounwind uwtable{{.*}} }




More information about the cfe-commits mailing list