[llvm] c844f88 - [ThinLTO/WPD] Fix index-based WPD for available_externally vtables

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 30 18:02:15 PDT 2019


Author: Teresa Johnson
Date: 2019-10-30T17:59:08-07:00
New Revision: c844f8846aabda577a8bf3b460d4993e89475218

URL: https://github.com/llvm/llvm-project/commit/c844f8846aabda577a8bf3b460d4993e89475218
DIFF: https://github.com/llvm/llvm-project/commit/c844f8846aabda577a8bf3b460d4993e89475218.diff

LOG: [ThinLTO/WPD] Fix index-based WPD for available_externally vtables

Summary:
Clang does not add type metadata to available_externally vtables. When
choosing a summary to look at for virtual function definitions, make
sure we skip summaries for any available externally vtables as they will
not describe any virtual function functions, which are only summarized
in the presence of type metadata on the vtable def. Simply look for the
corresponding strong def's summary.

Also add handling for same-named local vtables with the same GUID
because of same-named files without enough distinguishing path.
In that case we return a conservative result with no devirtualization.

Reviewers: pcc, davidxl, evgeny777

Subscribers: mehdi_amini, inglorion, hiraditya, steven_wu, dexonsmith, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69452

Added: 
    llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll
    llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll
    llvm/test/ThinLTO/X86/devirt_available_externally.ll
    llvm/test/ThinLTO/X86/devirt_local_same_guid.ll

Modified: 
    llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index f0cf5581ba8a..d7c750494042 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -841,17 +841,35 @@ bool DevirtIndex::tryFindVirtualCallTargets(
     std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
     uint64_t ByteOffset) {
   for (const TypeIdOffsetVtableInfo P : TIdInfo) {
-    // VTable initializer should have only one summary, or all copies must be
-    // linkonce/weak ODR.
+    // Ensure that we have at most one external linkage vtable initializer.
     assert(P.VTableVI.getSummaryList().size() == 1 ||
-           llvm::all_of(
+           llvm::count_if(
                P.VTableVI.getSummaryList(),
                [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
-                 return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) ||
-                        GlobalValue::isWeakODRLinkage(Summary->linkage());
-               }));
-    const auto *VS = cast<GlobalVarSummary>(P.VTableVI.getSummaryList()[0].get());
-    if (!P.VTableVI.getSummaryList()[0]->isLive())
+                 return GlobalValue::isExternalLinkage(Summary->linkage());
+               }) <= 1);
+    // Find the first non-available_externally linkage vtable initializer.
+    // We can have multiple available_externally, linkonce_odr and weak_odr
+    // vtable initializers, however we want to skip available_externally as they
+    // do not have type metadata attached, and therefore the summary will not
+    // contain any vtable functions.
+    //
+    // Also, handle the case of same-named local Vtables with the same path
+    // and therefore the same GUID. This can happen if there isn't enough
+    // distinguishing path when compiling the source file. In that case we
+    // conservatively return false early.
+    const GlobalVarSummary *VS = nullptr;
+    bool LocalFound = false;
+    for (auto &S : P.VTableVI.getSummaryList()) {
+      if (GlobalValue::isLocalLinkage(S->linkage())) {
+        if (LocalFound)
+          return false;
+        LocalFound = true;
+      }
+      if (!GlobalValue::isAvailableExternallyLinkage(S->linkage()))
+        VS = cast<GlobalVarSummary>(S.get());
+    }
+    if (!VS->isLive())
       continue;
     for (auto VTP : VS->vTableFuncs()) {
       if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)

diff  --git a/llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll b/llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll
new file mode 100644
index 000000000000..56d6ea5fc997
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/devirt_available_externally.ll
@@ -0,0 +1,14 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.D = type { i32 (...)** }
+
+ at _ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
+
+define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+attributes #0 = { noinline optnone }
+
+!3 = !{i64 16, !"_ZTS1D"}

diff  --git a/llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll b/llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll
new file mode 100644
index 000000000000..cf3d024c32a8
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/devirt_local_same_guid.ll
@@ -0,0 +1,41 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+source_filename = "-"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+
+ at _ZTV1B = internal constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1
+
+define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define internal i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @test2(%struct.B* %obj, i32 %a) {
+entry:
+  %0 = bitcast %struct.B* %obj to i8***
+  %vtable2 = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable2 to i8*
+  %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1B")
+  call void @llvm.assume(i1 %p2)
+
+  %fptrptr = getelementptr i8*, i8** %vtable2, i32 1
+  %2 = bitcast i8** %fptrptr to i32 (%struct.B*, i32)**
+  %fptr33 = load i32 (%struct.B*, i32)*, i32 (%struct.B*, i32)** %2, align 8
+
+  %call4 = tail call i32 %fptr33(%struct.B* nonnull %obj, i32 %a)
+  ret i32 %call4
+}
+
+attributes #0 = { noinline optnone }
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}

diff  --git a/llvm/test/ThinLTO/X86/devirt_available_externally.ll b/llvm/test/ThinLTO/X86/devirt_available_externally.ll
new file mode 100644
index 000000000000..128055f7022e
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/devirt_available_externally.ll
@@ -0,0 +1,72 @@
+; REQUIRES: x86-registered-target
+
+; Test index-based devirtualization when first copy is available_externally,
+; which doesn't have type metadata. We should use the strong external
+; def in the other module to devirtualize.
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -o %t3.o %s
+; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_available_externally.ll
+
+; The available_externally copy should not get vTableFuncs information in its
+; summary entry, but the external def should.
+; RUN: llvm-dis -o - %t3.o | FileCheck %s --check-prefix=AVAILEXTERNAL
+; AVAILEXTERNAL: gv: (name: "_ZTV1D"
+; AVAILEXTERNAL-NOT: vTableFuncs
+; AVAILEXTERNAL-SAME: ; guid =
+; RUN: llvm-dis -o - %t4.o | FileCheck %s --check-prefix=EXTERNAL
+; EXTERNAL: gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc:
+
+; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \
+; RUN:   -wholeprogramdevirt-print-index-based \
+; RUN:   -o %t5 \
+; RUN:   -r=%t3.o,test,px \
+; RUN:   -r=%t3.o,_ZTV1D, \
+; RUN:   -r=%t3.o,_ZN1D1mEi, \
+; RUN:   -r=%t4.o,_ZN1D1mEi,p \
+; RUN:   -r=%t4.o,_ZTV1D,px \
+; RUN:   2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1
+; RUN: llvm-nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2
+
+; NM-INDEX1-DAG: U _ZN1D1mEi
+
+; NM-INDEX2-DAG: T _ZN1D1mEi
+
+; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1D1mEi)
+
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.D = type { i32 (...)** }
+
+ at _ZTV1D = available_externally constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }
+
+; CHECK-IR1-LABEL: define i32 @test
+define i32 @test(%struct.D* %obj2, i32 %a) {
+entry:
+  %0 = bitcast %struct.D* %obj2 to i8***
+  %vtable2 = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable2 to i8*
+  %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1D")
+  call void @llvm.assume(i1 %p2)
+
+  %2 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
+  %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %2, align 8
+
+  ; Check that the call was devirtualized.
+  ; CHECK-IR1: %call4 = tail call i32 @_ZN1D1mEi
+  %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %a)
+  ret i32 %call4
+}
+; CHECK-IR1-LABEL: ret i32
+; CHECK-IR1-LABEL: }
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a)
+
+attributes #0 = { noinline optnone }

diff  --git a/llvm/test/ThinLTO/X86/devirt_local_same_guid.ll b/llvm/test/ThinLTO/X86/devirt_local_same_guid.ll
new file mode 100644
index 000000000000..8354bc19149f
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/devirt_local_same_guid.ll
@@ -0,0 +1,74 @@
+; REQUIRES: x86-registered-target
+
+; Test that index-based devirtualization in the presence of same-named
+; local vtables in same named source files fails.
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -o %t3.o %s
+; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_local_same_guid.ll
+
+; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -wholeprogramdevirt-print-index-based \
+; RUN:   -o %t5 \
+; RUN:   -r=%t3.o,use_B,px \
+; RUN:   -r=%t3.o,test,px \
+; RUN:   -r=%t4.o,test2,px
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+source_filename = "-"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+
+ at _ZTV1B = internal constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1nEi to i8*)] }, !type !0, !type !1
+
+define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define internal i32 @_ZN1B1nEi(%struct.B* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+; Ensures that vtable of B is live so that we will attempt devirt.
+define dso_local i32 @use_B(%struct.B* %a) {
+entry:
+  %0 = bitcast %struct.B* %a to i32 (...)***
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV1B, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret i32 0
+}
+
+; CHECK-IR1: define i32 @test(
+define i32 @test(%struct.A* %obj, i32 %a) {
+entry:
+  %0 = bitcast %struct.A* %obj to i8***
+  %vtable = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable to i8*
+  %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr i8*, i8** %vtable, i32 1
+  %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
+  %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
+
+  ; Check that the call was not devirtualized.
+  ; CHECK-IR1: %call = tail call i32 %fptr1
+  %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
+
+  ret i32 %call
+}
+
+; CHECK-IR2: define i32 @test2
+; Check that the call was not devirtualized.
+; CHECK-IR2:   %call4 = tail call i32 %fptr
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+attributes #0 = { noinline optnone }
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}


        


More information about the llvm-commits mailing list