[llvm] d99033e - [LTO][WPD] Suppress WPD on a class if the LTO unit doesn't have the prevailing definition of this class (#131721)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 22:11:01 PDT 2025


Author: Mingming Liu
Date: 2025-03-19T22:10:57-07:00
New Revision: d99033e4b4081bc47510c451b618ef93fe2f3c62

URL: https://github.com/llvm/llvm-project/commit/d99033e4b4081bc47510c451b618ef93fe2f3c62
DIFF: https://github.com/llvm/llvm-project/commit/d99033e4b4081bc47510c451b618ef93fe2f3c62.diff

LOG: [LTO][WPD] Suppress WPD on a class if the LTO unit doesn't have the prevailing definition of this class (#131721)

Before this patch, whole program devirtualization is suppressed on a
class if any superclass is visible to regular object files, by recording
the class GUID in `VisibleToRegularObjSymbols`.

This patch suppresses whole program devirtualization on a class if the
LTO unit doesn't have the prevailing definition of this class (e.g., the
prevailing definition is in a shared library)

Implementation summaries:
1. In llvm/lib/LTO/LTO.cpp, `IsVisibleToRegularObj` is updated to look
at the global resolution's `IsPrevailing` bit for ThinLTO and
regularLTO.
2. In llvm/tools/llvm-lto2/llvm-lto2.cpp, 
- three command line options are added so `llvm-lto2` can override
`Conf.HasWholeProgramVisibility`, `Conf.ValidateAllVtablesHaveTypeInfos`
and `Conf.AllVtablesHaveTypeInfos`.
    
The test case is reduced from a small C++ program (main.cc, lib.cc/h
pasted below in [1]). To reproduce the program failure without this
patch, compile lib.cc into a shared library, and provide it to a ThinLTO
build of main.cc (commands are pasted in [2]).

[1]

* lib.h

```
#include <cstdio>

class Derived {
 public:
  void dispatch();
  virtual void print();
  virtual void sum();
};

void Derived::dispatch() {
  static_cast<Derived*>(this)->print();
  static_cast<Derived*>(this)->sum();
}

void Derived::sum() {
  printf("Derived::sum\n");
}

__attribute__((noinline)) void* create(int i);
__attribute__((noinline)) void* getPtr(int i);

```

* lib.cc

```
#include "lib.h"
#include <cstdio>
#include <iostream>

class Derived2 : public Derived {
public:
  void print() override {
    printf("DerivedSharedLib\n");
  }
  void sum() override {
    printf("DerivedSharedLib::sum\n");
  }
};


void Derived::print() {
  printf("Derived\n");
}

__attribute__((noinline)) void* create(int i) {
  if (i & 1)
    return new Derived2();
  return new Derived();
}

```

* main.cc

```
cat main.cc
#include "lib.h"

class DerivedN : public Derived {
 public:
};

__attribute__((noinline)) void* getPtr(int x) {
  return new DerivedN();
}


int main() {
  Derived*b = static_cast<Derived*>(create(201));
  b->dispatch();
  delete b;

  Derived* a = static_cast<Derived*>(getPtr(202));
  a->dispatch();
  delete a;
  return 0;
}

```

[2]
```
# compile lib.o in a shared library.
$ ./bin/clang++ -O2 -fPIC -c lib.cc -o lib.o
$ ./bin/clang++ -shared -o libdata.so lib.o

# Provide the shared library in `-ldata`
$ ./bin/clang++ -v -g -ldata --save-temps -fno-discard-value-names -Wl,-mllvm,-print-before=wholeprogramdevirt -Wl,-mllvm,-wholeprogramdevirt-check=trap -Rpass=wholeprogramdevirt -Wl,--lto-whole-program-visibility -Wl,--lto-validate-all-vtables-have-type-infos -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -flto=thin -fwhole-program-vtables  -fno-split-lto-unit -fuse-ld=lld   main.cc  -L . -o main >/tmp/wholeprogramdevirt.ir 2>&1

# Run the program hits a segmentation fault with  `-Wl,-mllvm,-wholeprogramdevirt-check=trap`

$ LD_LIBRARY_PATH=. ./main
DerivedSharedLib
Trace/breakpoint trap (core dumped)

```

Added: 
    llvm/test/ThinLTO/X86/devirt_prevailing.ll

Modified: 
    llvm/lib/LTO/LTO.cpp
    llvm/tools/llvm-lto2/llvm-lto2.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index e895a46b8cd77..b12be4ac86aae 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1314,7 +1314,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
   // expected to be handled separately.
   auto IsVisibleToRegularObj = [&](StringRef name) {
     auto It = GlobalResolutions->find(name);
-    return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary);
+    return (It == GlobalResolutions->end() ||
+            It->second.VisibleOutsideSummary || !It->second.Prevailing);
   };
 
   // If allowed, upgrade public vcall visibility metadata to linkage unit
@@ -1905,7 +1906,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
     auto IsVisibleToRegularObj = [&](StringRef name) {
       auto It = GlobalResolutions->find(name);
       return (It == GlobalResolutions->end() ||
-              It->second.VisibleOutsideSummary);
+              It->second.VisibleOutsideSummary || !It->second.Prevailing);
     };
 
     getVisibleToRegularObjVtableGUIDs(ThinLTO.CombinedIndex,

diff  --git a/llvm/test/ThinLTO/X86/devirt_prevailing.ll b/llvm/test/ThinLTO/X86/devirt_prevailing.ll
new file mode 100644
index 0000000000000..497b8f8a8603e
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/devirt_prevailing.ll
@@ -0,0 +1,158 @@
+; RUN: rm -rf %t && mkdir %t && cd %t
+
+; Tests that devirtualization is suppressed on a class when the LTO unit doesn't
+; have the prevailing definition of the base class.
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -o summary.o %s
+
+; Index based WPD
+; The callsite inside @_ZN4Base8dispatchEv gets devirtualized when symbol
+; resolution shows there is a prevailing definition of `_ZTI7Derived` in the
+; LTO unit.
+; RUN: llvm-lto2 run summary.o -save-temps -pass-remarks=. \
+; RUN:   -thinlto-threads=1 \
+; RUN:   -o tmp \
+; RUN:   --whole-program-visibility-enabled-in-lto=true \
+; RUN:   --validate-all-vtables-have-type-infos=true \
+; RUN:   --all-vtables-have-type-infos=true \
+; RUN:   -r=summary.o,_ZN4Base8dispatchEv,px \
+; RUN:   -r=summary.o,_ZN8DerivedN5printEv,px \
+; RUN:   -r=summary.o,_ZTV8DerivedN,p \
+; RUN:   -r=summary.o,_ZTI8DerivedN,p \
+; RUN:   -r=summary.o,_ZTS8DerivedN,p \
+; RUN:   -r=summary.o,_ZTI7Derived,p \
+; RUN:   2>&1 | FileCheck --allow-empty %s --check-prefix=REMARK
+
+
+; Index based WPD
+; The callsite inside @_ZN4Base8dispatchEv remains indirect and not de-virtualized
+; when symbol resolution shows there isn't a prevailing definition of
+; `_ZTI7Derived` in the LTO unit.
+; RUN: llvm-lto2  run summary.o -save-temps -pass-remarks=. \
+; RUN:   -thinlto-threads=1 \
+; RUN:   -o tmp \
+; RUN:   --whole-program-visibility-enabled-in-lto=true \
+; RUN:   --validate-all-vtables-have-type-infos=true \
+; RUN:   --all-vtables-have-type-infos=true \
+; RUN:   -r=summary.o,_ZN4Base8dispatchEv,px \
+; RUN:   -r=summary.o,_ZN8DerivedN5printEv,px \
+; RUN:   -r=summary.o,_ZTV8DerivedN,p \
+; RUN:   -r=summary.o,_ZTI8DerivedN,p \
+; RUN:   -r=summary.o,_ZTS8DerivedN,p \
+; RUN:   -r=summary.o,_ZTI7Derived, \
+; RUN:   2>&1 | FileCheck %s --allow-empty --implicit-check-not='single-impl: devirtualized a call to'
+
+; Repeat the above tests for WPD in hybrid LTO.
+; RUN: opt  --thinlto-bc --thinlto-split-lto-unit -o hybrid.o %s
+
+; RUN: llvm-lto2 run hybrid.o -save-temps -pass-remarks=. \
+; RUN:   -thinlto-threads=1 \
+; RUN:   -o hybrid-tmp \
+; RUN:   --whole-program-visibility-enabled-in-lto=true \
+; RUN:   --validate-all-vtables-have-type-infos=true \
+; RUN:   --all-vtables-have-type-infos=true \
+; RUN:   -r=hybrid.o,_ZN4Base8dispatchEv,px \
+; RUN:   -r=hybrid.o,_ZN8DerivedN5printEv,px \
+; RUN:   -r=hybrid.o,_ZTV8DerivedN, \
+; RUN:   -r=hybrid.o,_ZTI8DerivedN, \
+; RUN:   -r=hybrid.o,_ZTS8DerivedN,p \
+; RUN:   -r=hybrid.o,_ZTI7Derived,p \
+; RUN:   -r=hybrid.o,_ZN8DerivedN5printEv, \
+; RUN:   -r=hybrid.o,_ZTV8DerivedN,p \
+; RUN:   -r=hybrid.o,_ZTI8DerivedN,p \
+; RUN:   2>&1 | FileCheck --allow-empty %s --check-prefix=REMARK
+
+
+; RUN: llvm-lto2 run hybrid.o -save-temps -pass-remarks=. \
+; RUN:   -thinlto-threads=1 \
+; RUN:   -o hybrid-tmp \
+; RUN:   --whole-program-visibility-enabled-in-lto=true \
+; RUN:   --validate-all-vtables-have-type-infos=true \
+; RUN:   --all-vtables-have-type-infos=true \
+; RUN:   -r=hybrid.o,_ZN4Base8dispatchEv,px \
+; RUN:   -r=hybrid.o,_ZN8DerivedN5printEv,px \
+; RUN:   -r=hybrid.o,_ZTV8DerivedN, \
+; RUN:   -r=hybrid.o,_ZTI8DerivedN, \
+; RUN:   -r=hybrid.o,_ZTS8DerivedN,p \
+; RUN:   -r=hybrid.o,_ZTI7Derived, \
+; RUN:   -r=hybrid.o,_ZN8DerivedN5printEv, \
+; RUN:   -r=hybrid.o,_ZTV8DerivedN,p \
+; RUN:   -r=hybrid.o,_ZTI8DerivedN,p \
+; RUN:   2>&1 | FileCheck --allow-empty %s --implicit-check-not='single-impl: devirtualized a call to'
+
+
+; Repeat the above tests for WPD in regular LTO.
+; RUN: opt  -module-summary -o regular.o %s
+
+; RUN: llvm-lto2 run regular.o -save-temps -pass-remarks=. \
+; RUN:   -thinlto-threads=1 \
+; RUN:   -o regular-temp \
+; RUN:   --whole-program-visibility-enabled-in-lto=true \
+; RUN:   --validate-all-vtables-have-type-infos=true \
+; RUN:   --all-vtables-have-type-infos=true \
+; RUN:   -r=regular.o,_ZN4Base8dispatchEv,px \
+; RUN:   -r=regular.o,_ZN8DerivedN5printEv,px \
+; RUN:   -r=regular.o,_ZTS8DerivedN,p \
+; RUN:   -r=regular.o,_ZTI7Derived,p \
+; RUN:   -r=regular.o,_ZTV8DerivedN,p \
+; RUN:   -r=regular.o,_ZTI8DerivedN,p \
+; RUN:   2>&1 | FileCheck --allow-empty %s --check-prefix=REMARK
+
+; RUN: llvm-lto2 run regular.o -save-temps -pass-remarks=. \
+; RUN:   -thinlto-threads=1 \
+; RUN:   -o regular-temp \
+; RUN:   --whole-program-visibility-enabled-in-lto=true \
+; RUN:   --validate-all-vtables-have-type-infos=true \
+; RUN:   --all-vtables-have-type-infos=true \
+; RUN:   -r=regular.o,_ZN4Base8dispatchEv,px \
+; RUN:   -r=regular.o,_ZN8DerivedN5printEv,px \
+; RUN:   -r=regular.o,_ZTS8DerivedN,p \
+; RUN:   -r=regular.o,_ZTI7Derived, \
+; RUN:   -r=regular.o,_ZTV8DerivedN,p \
+; RUN:   -r=regular.o,_ZTI8DerivedN,p \
+; RUN:   2>&1 | FileCheck --allow-empty %s --implicit-check-not='single-impl: devirtualized a call to'
+
+; REMARK: single-impl: devirtualized a call to _ZN8DerivedN5printEv
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV8DerivedN = linkonce_odr hidden constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI8DerivedN, ptr @_ZN8DerivedN5printEv] }, !type !0, !type !1, !type !2, !type !3, !vcall_visibility !4
+ at _ZTI8DerivedN = linkonce_odr hidden constant { ptr, ptr, ptr } { ptr null, ptr @_ZTS8DerivedN, ptr @_ZTI7Derived }
+ at _ZTS8DerivedN = linkonce_odr hidden constant [10 x i8] c"8DerivedN\00", align 1
+ at _ZTI7Derived = external constant ptr
+
+; Whole program devirtualization will ignore summaries that are not live.
+; Mark '_ZTV8DerivedN' as used so it remains live.
+ at llvm.used = appending global [1 x ptr] [ptr @_ZTV8DerivedN], section "llvm.metadata"
+
+define hidden void @_ZN4Base8dispatchEv(ptr %this) {
+entry:
+  %this.addr = alloca ptr
+  store ptr %this, ptr %this.addr
+  %this1 = load ptr, ptr %this.addr
+  %vtable = load ptr, ptr %this1
+  %0 = call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS7Derived")
+  call void @llvm.assume(i1 %0)
+  %vfn = getelementptr inbounds ptr, ptr %vtable, i64 0
+  %1 = load ptr, ptr %vfn
+  call void %1(ptr %this1)
+  ret void
+}
+
+define linkonce_odr hidden void @_ZN8DerivedN5printEv(ptr %this) #0 {
+entry:
+  ret void
+}
+
+attributes #0 = { noinline optnone }
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i64 16, !"_ZTS7Derived"}
+!1 = !{i64 16, !"_ZTSM7DerivedFvvE.virtual"}
+!2 = !{i64 16, !"_ZTS8DerivedN"}
+!3 = !{i64 16, !"_ZTSM8DerivedNFvvE.virtual"}
+!4 = !{i64 0}

diff  --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index dbb7f2f3028aa..4c9b47d78a1bb 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -187,6 +187,18 @@ static cl::opt<bool> EnableFreestanding(
     cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"),
     cl::Hidden);
 
+static cl::opt<bool> WholeProgramVisibilityEnabledInLTO(
+    "whole-program-visibility-enabled-in-lto",
+    cl::desc("Enable whole program visibility during LTO"), cl::Hidden);
+
+static cl::opt<bool> ValidateAllVtablesHaveTypeInfos(
+    "validate-all-vtables-have-type-infos",
+    cl::desc("Validate that all vtables have type infos in LTO"), cl::Hidden);
+
+static cl::opt<bool>
+    AllVtablesHaveTypeInfos("all-vtables-have-type-infos", cl::Hidden,
+                            cl::desc("All vtables have type infos"));
+
 extern cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat;
 extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
 
@@ -332,6 +344,13 @@ static int run(int argc, char **argv) {
   Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
   Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
 
+  if (WholeProgramVisibilityEnabledInLTO.getNumOccurrences() > 0)
+    Conf.HasWholeProgramVisibility = WholeProgramVisibilityEnabledInLTO;
+  if (ValidateAllVtablesHaveTypeInfos.getNumOccurrences() > 0)
+    Conf.ValidateAllVtablesHaveTypeInfos = ValidateAllVtablesHaveTypeInfos;
+  if (AllVtablesHaveTypeInfos.getNumOccurrences() > 0)
+    Conf.AllVtablesHaveTypeInfos = AllVtablesHaveTypeInfos;
+
   ThinBackend Backend;
   if (ThinLTODistributedIndexes)
     Backend = createWriteIndexesThinBackend(llvm::hardware_concurrency(Threads),


        


More information about the llvm-commits mailing list