[llvm] d99033e - [LTO][WPD] Suppress WPD on a class if the LTO unit doesn't have the prevailing definition of this class (#131721)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 22:11:01 PDT 2025
Author: Mingming Liu
Date: 2025-03-19T22:10:57-07:00
New Revision: d99033e4b4081bc47510c451b618ef93fe2f3c62
URL: https://github.com/llvm/llvm-project/commit/d99033e4b4081bc47510c451b618ef93fe2f3c62
DIFF: https://github.com/llvm/llvm-project/commit/d99033e4b4081bc47510c451b618ef93fe2f3c62.diff
LOG: [LTO][WPD] Suppress WPD on a class if the LTO unit doesn't have the prevailing definition of this class (#131721)
Before this patch, whole program devirtualization is suppressed on a
class if any superclass is visible to regular object files, by recording
the class GUID in `VisibleToRegularObjSymbols`.
This patch suppresses whole program devirtualization on a class if the
LTO unit doesn't have the prevailing definition of this class (e.g., the
prevailing definition is in a shared library)
Implementation summaries:
1. In llvm/lib/LTO/LTO.cpp, `IsVisibleToRegularObj` is updated to look
at the global resolution's `IsPrevailing` bit for ThinLTO and
regularLTO.
2. In llvm/tools/llvm-lto2/llvm-lto2.cpp,
- three command line options are added so `llvm-lto2` can override
`Conf.HasWholeProgramVisibility`, `Conf.ValidateAllVtablesHaveTypeInfos`
and `Conf.AllVtablesHaveTypeInfos`.
The test case is reduced from a small C++ program (main.cc, lib.cc/h
pasted below in [1]). To reproduce the program failure without this
patch, compile lib.cc into a shared library, and provide it to a ThinLTO
build of main.cc (commands are pasted in [2]).
[1]
* lib.h
```
#include <cstdio>
class Derived {
public:
void dispatch();
virtual void print();
virtual void sum();
};
void Derived::dispatch() {
static_cast<Derived*>(this)->print();
static_cast<Derived*>(this)->sum();
}
void Derived::sum() {
printf("Derived::sum\n");
}
__attribute__((noinline)) void* create(int i);
__attribute__((noinline)) void* getPtr(int i);
```
* lib.cc
```
#include "lib.h"
#include <cstdio>
#include <iostream>
class Derived2 : public Derived {
public:
void print() override {
printf("DerivedSharedLib\n");
}
void sum() override {
printf("DerivedSharedLib::sum\n");
}
};
void Derived::print() {
printf("Derived\n");
}
__attribute__((noinline)) void* create(int i) {
if (i & 1)
return new Derived2();
return new Derived();
}
```
* main.cc
```
cat main.cc
#include "lib.h"
class DerivedN : public Derived {
public:
};
__attribute__((noinline)) void* getPtr(int x) {
return new DerivedN();
}
int main() {
Derived*b = static_cast<Derived*>(create(201));
b->dispatch();
delete b;
Derived* a = static_cast<Derived*>(getPtr(202));
a->dispatch();
delete a;
return 0;
}
```
[2]
```
# compile lib.o in a shared library.
$ ./bin/clang++ -O2 -fPIC -c lib.cc -o lib.o
$ ./bin/clang++ -shared -o libdata.so lib.o
# Provide the shared library in `-ldata`
$ ./bin/clang++ -v -g -ldata --save-temps -fno-discard-value-names -Wl,-mllvm,-print-before=wholeprogramdevirt -Wl,-mllvm,-wholeprogramdevirt-check=trap -Rpass=wholeprogramdevirt -Wl,--lto-whole-program-visibility -Wl,--lto-validate-all-vtables-have-type-infos -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -flto=thin -fwhole-program-vtables -fno-split-lto-unit -fuse-ld=lld main.cc -L . -o main >/tmp/wholeprogramdevirt.ir 2>&1
# Run the program hits a segmentation fault with `-Wl,-mllvm,-wholeprogramdevirt-check=trap`
$ LD_LIBRARY_PATH=. ./main
DerivedSharedLib
Trace/breakpoint trap (core dumped)
```
Added:
llvm/test/ThinLTO/X86/devirt_prevailing.ll
Modified:
llvm/lib/LTO/LTO.cpp
llvm/tools/llvm-lto2/llvm-lto2.cpp
Removed:
################################################################################
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index e895a46b8cd77..b12be4ac86aae 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1314,7 +1314,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
// expected to be handled separately.
auto IsVisibleToRegularObj = [&](StringRef name) {
auto It = GlobalResolutions->find(name);
- return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary);
+ return (It == GlobalResolutions->end() ||
+ It->second.VisibleOutsideSummary || !It->second.Prevailing);
};
// If allowed, upgrade public vcall visibility metadata to linkage unit
@@ -1905,7 +1906,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
auto IsVisibleToRegularObj = [&](StringRef name) {
auto It = GlobalResolutions->find(name);
return (It == GlobalResolutions->end() ||
- It->second.VisibleOutsideSummary);
+ It->second.VisibleOutsideSummary || !It->second.Prevailing);
};
getVisibleToRegularObjVtableGUIDs(ThinLTO.CombinedIndex,
diff --git a/llvm/test/ThinLTO/X86/devirt_prevailing.ll b/llvm/test/ThinLTO/X86/devirt_prevailing.ll
new file mode 100644
index 0000000000000..497b8f8a8603e
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/devirt_prevailing.ll
@@ -0,0 +1,158 @@
+; RUN: rm -rf %t && mkdir %t && cd %t
+
+; Tests that devirtualization is suppressed on a class when the LTO unit doesn't
+; have the prevailing definition of the base class.
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -o summary.o %s
+
+; Index based WPD
+; The callsite inside @_ZN4Base8dispatchEv gets devirtualized when symbol
+; resolution shows there is a prevailing definition of `_ZTI7Derived` in the
+; LTO unit.
+; RUN: llvm-lto2 run summary.o -save-temps -pass-remarks=. \
+; RUN: -thinlto-threads=1 \
+; RUN: -o tmp \
+; RUN: --whole-program-visibility-enabled-in-lto=true \
+; RUN: --validate-all-vtables-have-type-infos=true \
+; RUN: --all-vtables-have-type-infos=true \
+; RUN: -r=summary.o,_ZN4Base8dispatchEv,px \
+; RUN: -r=summary.o,_ZN8DerivedN5printEv,px \
+; RUN: -r=summary.o,_ZTV8DerivedN,p \
+; RUN: -r=summary.o,_ZTI8DerivedN,p \
+; RUN: -r=summary.o,_ZTS8DerivedN,p \
+; RUN: -r=summary.o,_ZTI7Derived,p \
+; RUN: 2>&1 | FileCheck --allow-empty %s --check-prefix=REMARK
+
+
+; Index based WPD
+; The callsite inside @_ZN4Base8dispatchEv remains indirect and not de-virtualized
+; when symbol resolution shows there isn't a prevailing definition of
+; `_ZTI7Derived` in the LTO unit.
+; RUN: llvm-lto2 run summary.o -save-temps -pass-remarks=. \
+; RUN: -thinlto-threads=1 \
+; RUN: -o tmp \
+; RUN: --whole-program-visibility-enabled-in-lto=true \
+; RUN: --validate-all-vtables-have-type-infos=true \
+; RUN: --all-vtables-have-type-infos=true \
+; RUN: -r=summary.o,_ZN4Base8dispatchEv,px \
+; RUN: -r=summary.o,_ZN8DerivedN5printEv,px \
+; RUN: -r=summary.o,_ZTV8DerivedN,p \
+; RUN: -r=summary.o,_ZTI8DerivedN,p \
+; RUN: -r=summary.o,_ZTS8DerivedN,p \
+; RUN: -r=summary.o,_ZTI7Derived, \
+; RUN: 2>&1 | FileCheck %s --allow-empty --implicit-check-not='single-impl: devirtualized a call to'
+
+; Repeat the above tests for WPD in hybrid LTO.
+; RUN: opt --thinlto-bc --thinlto-split-lto-unit -o hybrid.o %s
+
+; RUN: llvm-lto2 run hybrid.o -save-temps -pass-remarks=. \
+; RUN: -thinlto-threads=1 \
+; RUN: -o hybrid-tmp \
+; RUN: --whole-program-visibility-enabled-in-lto=true \
+; RUN: --validate-all-vtables-have-type-infos=true \
+; RUN: --all-vtables-have-type-infos=true \
+; RUN: -r=hybrid.o,_ZN4Base8dispatchEv,px \
+; RUN: -r=hybrid.o,_ZN8DerivedN5printEv,px \
+; RUN: -r=hybrid.o,_ZTV8DerivedN, \
+; RUN: -r=hybrid.o,_ZTI8DerivedN, \
+; RUN: -r=hybrid.o,_ZTS8DerivedN,p \
+; RUN: -r=hybrid.o,_ZTI7Derived,p \
+; RUN: -r=hybrid.o,_ZN8DerivedN5printEv, \
+; RUN: -r=hybrid.o,_ZTV8DerivedN,p \
+; RUN: -r=hybrid.o,_ZTI8DerivedN,p \
+; RUN: 2>&1 | FileCheck --allow-empty %s --check-prefix=REMARK
+
+
+; RUN: llvm-lto2 run hybrid.o -save-temps -pass-remarks=. \
+; RUN: -thinlto-threads=1 \
+; RUN: -o hybrid-tmp \
+; RUN: --whole-program-visibility-enabled-in-lto=true \
+; RUN: --validate-all-vtables-have-type-infos=true \
+; RUN: --all-vtables-have-type-infos=true \
+; RUN: -r=hybrid.o,_ZN4Base8dispatchEv,px \
+; RUN: -r=hybrid.o,_ZN8DerivedN5printEv,px \
+; RUN: -r=hybrid.o,_ZTV8DerivedN, \
+; RUN: -r=hybrid.o,_ZTI8DerivedN, \
+; RUN: -r=hybrid.o,_ZTS8DerivedN,p \
+; RUN: -r=hybrid.o,_ZTI7Derived, \
+; RUN: -r=hybrid.o,_ZN8DerivedN5printEv, \
+; RUN: -r=hybrid.o,_ZTV8DerivedN,p \
+; RUN: -r=hybrid.o,_ZTI8DerivedN,p \
+; RUN: 2>&1 | FileCheck --allow-empty %s --implicit-check-not='single-impl: devirtualized a call to'
+
+
+; Repeat the above tests for WPD in regular LTO.
+; RUN: opt -module-summary -o regular.o %s
+
+; RUN: llvm-lto2 run regular.o -save-temps -pass-remarks=. \
+; RUN: -thinlto-threads=1 \
+; RUN: -o regular-temp \
+; RUN: --whole-program-visibility-enabled-in-lto=true \
+; RUN: --validate-all-vtables-have-type-infos=true \
+; RUN: --all-vtables-have-type-infos=true \
+; RUN: -r=regular.o,_ZN4Base8dispatchEv,px \
+; RUN: -r=regular.o,_ZN8DerivedN5printEv,px \
+; RUN: -r=regular.o,_ZTS8DerivedN,p \
+; RUN: -r=regular.o,_ZTI7Derived,p \
+; RUN: -r=regular.o,_ZTV8DerivedN,p \
+; RUN: -r=regular.o,_ZTI8DerivedN,p \
+; RUN: 2>&1 | FileCheck --allow-empty %s --check-prefix=REMARK
+
+; RUN: llvm-lto2 run regular.o -save-temps -pass-remarks=. \
+; RUN: -thinlto-threads=1 \
+; RUN: -o regular-temp \
+; RUN: --whole-program-visibility-enabled-in-lto=true \
+; RUN: --validate-all-vtables-have-type-infos=true \
+; RUN: --all-vtables-have-type-infos=true \
+; RUN: -r=regular.o,_ZN4Base8dispatchEv,px \
+; RUN: -r=regular.o,_ZN8DerivedN5printEv,px \
+; RUN: -r=regular.o,_ZTS8DerivedN,p \
+; RUN: -r=regular.o,_ZTI7Derived, \
+; RUN: -r=regular.o,_ZTV8DerivedN,p \
+; RUN: -r=regular.o,_ZTI8DerivedN,p \
+; RUN: 2>&1 | FileCheck --allow-empty %s --implicit-check-not='single-impl: devirtualized a call to'
+
+; REMARK: single-impl: devirtualized a call to _ZN8DerivedN5printEv
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV8DerivedN = linkonce_odr hidden constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI8DerivedN, ptr @_ZN8DerivedN5printEv] }, !type !0, !type !1, !type !2, !type !3, !vcall_visibility !4
+ at _ZTI8DerivedN = linkonce_odr hidden constant { ptr, ptr, ptr } { ptr null, ptr @_ZTS8DerivedN, ptr @_ZTI7Derived }
+ at _ZTS8DerivedN = linkonce_odr hidden constant [10 x i8] c"8DerivedN\00", align 1
+ at _ZTI7Derived = external constant ptr
+
+; Whole program devirtualization will ignore summaries that are not live.
+; Mark '_ZTV8DerivedN' as used so it remains live.
+ at llvm.used = appending global [1 x ptr] [ptr @_ZTV8DerivedN], section "llvm.metadata"
+
+define hidden void @_ZN4Base8dispatchEv(ptr %this) {
+entry:
+ %this.addr = alloca ptr
+ store ptr %this, ptr %this.addr
+ %this1 = load ptr, ptr %this.addr
+ %vtable = load ptr, ptr %this1
+ %0 = call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS7Derived")
+ call void @llvm.assume(i1 %0)
+ %vfn = getelementptr inbounds ptr, ptr %vtable, i64 0
+ %1 = load ptr, ptr %vfn
+ call void %1(ptr %this1)
+ ret void
+}
+
+define linkonce_odr hidden void @_ZN8DerivedN5printEv(ptr %this) #0 {
+entry:
+ ret void
+}
+
+attributes #0 = { noinline optnone }
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i64 16, !"_ZTS7Derived"}
+!1 = !{i64 16, !"_ZTSM7DerivedFvvE.virtual"}
+!2 = !{i64 16, !"_ZTS8DerivedN"}
+!3 = !{i64 16, !"_ZTSM8DerivedNFvvE.virtual"}
+!4 = !{i64 0}
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index dbb7f2f3028aa..4c9b47d78a1bb 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -187,6 +187,18 @@ static cl::opt<bool> EnableFreestanding(
cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"),
cl::Hidden);
+static cl::opt<bool> WholeProgramVisibilityEnabledInLTO(
+ "whole-program-visibility-enabled-in-lto",
+ cl::desc("Enable whole program visibility during LTO"), cl::Hidden);
+
+static cl::opt<bool> ValidateAllVtablesHaveTypeInfos(
+ "validate-all-vtables-have-type-infos",
+ cl::desc("Validate that all vtables have type infos in LTO"), cl::Hidden);
+
+static cl::opt<bool>
+ AllVtablesHaveTypeInfos("all-vtables-have-type-infos", cl::Hidden,
+ cl::desc("All vtables have type infos"));
+
extern cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat;
extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
@@ -332,6 +344,13 @@ static int run(int argc, char **argv) {
Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
+ if (WholeProgramVisibilityEnabledInLTO.getNumOccurrences() > 0)
+ Conf.HasWholeProgramVisibility = WholeProgramVisibilityEnabledInLTO;
+ if (ValidateAllVtablesHaveTypeInfos.getNumOccurrences() > 0)
+ Conf.ValidateAllVtablesHaveTypeInfos = ValidateAllVtablesHaveTypeInfos;
+ if (AllVtablesHaveTypeInfos.getNumOccurrences() > 0)
+ Conf.AllVtablesHaveTypeInfos = AllVtablesHaveTypeInfos;
+
ThinBackend Backend;
if (ThinLTODistributedIndexes)
Backend = createWriteIndexesThinBackend(llvm::hardware_concurrency(Threads),
More information about the llvm-commits
mailing list