[clang] [llvm] [WPD]: Apply speculative WPD in non-lto mode. (PR #145031)
Hassnaa Hamdi via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 21:28:38 PDT 2025
https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/145031
>From 54296ceadcdf59136f68c1e3132a2528396361d4 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Fri, 20 Jun 2025 01:35:38 +0000
Subject: [PATCH 1/2] [WPD]: Apply speculative WPD in non-lto mode.
- This patch apply speculative devirtualization in non-lto
mode where visibility is not needed.
- It's still safe to devirtualize becasue we do speculation.
- In non-lto mode, only speculative devirtualization is allowed
without other features like vitual constant propagation to
minimize the drawback of wrong speculation.
---
clang/docs/UsersManual.rst | 8 +-
clang/lib/CodeGen/BackendUtil.cpp | 1 +
clang/lib/CodeGen/CGVTables.cpp | 3 +-
clang/lib/Driver/ToolChains/Clang.cpp | 8 +-
clang/test/CodeGenCXX/devirt-single-impl.cpp | 56 ++++++++++++++
clang/test/Driver/whole-program-vtables.c | 10 +--
llvm/include/llvm/Passes/PassBuilder.h | 6 ++
.../llvm/Transforms/IPO/WholeProgramDevirt.h | 10 ++-
llvm/lib/Passes/PassBuilderPipelines.cpp | 18 +++++
.../lib/Transforms/IPO/WholeProgramDevirt.cpp | 74 +++++++++++++++----
10 files changed, 164 insertions(+), 30 deletions(-)
create mode 100644 clang/test/CodeGenCXX/devirt-single-impl.cpp
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 62844f7e6a2fa..a433a66e0b7a6 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2275,9 +2275,13 @@ are listed below.
.. option:: -fwhole-program-vtables
+ In LTO mode:
Enable whole-program vtable optimizations, such as single-implementation
devirtualization and virtual constant propagation, for classes with
- :doc:`hidden LTO visibility <LTOVisibility>`. Requires ``-flto``.
+ :doc:`hidden LTO visibility <LTOVisibility>`.
+ In non-LTO mode:
+ Enables speculative devirtualization only without other features.
+ Doesn't require ``-flto`` or visibility.
.. option:: -f[no]split-lto-unit
@@ -5170,7 +5174,7 @@ Execute ``clang-cl /?`` to see a list of supported options:
-fstandalone-debug Emit full debug info for all types used by the program
-fstrict-aliasing Enable optimizations based on strict aliasing rules
-fsyntax-only Run the preprocessor, parser and semantic analysis stages
- -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto
+ -fwhole-program-vtables Enables whole-program vtable optimization.
-gcodeview-ghash Emit type record hashes in a .debug$H section
-gcodeview Generate CodeView debug information
-gline-directives-only Emit debug line info directives only
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 7e0a3cf5591ce..f6963aadfbc69 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -902,6 +902,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
// non-integrated assemblers don't recognize .cgprofile section.
PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO;
+ PTO.WholeProgramDevirt = CodeGenOpts.WholeProgramVTables;
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index 2897ccdf88660..cfb78d623c7ec 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -1359,7 +1359,8 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
// Emit type metadata on vtables with LTO or IR instrumentation.
// In IR instrumentation, the type metadata is used to find out vtable
// definitions (for type profiling) among all global variables.
- if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr())
+ if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() &&
+ !getCodeGenOpts().WholeProgramVTables)
return;
CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 8556bcadf0915..cc337ad334f65 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7847,8 +7847,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
IsDeviceOffloadAction ? D.getLTOMode() : D.getOffloadLTOMode();
auto OtherIsUsingLTO = OtherLTOMode != LTOK_None;
- if ((!IsUsingLTO && !OtherIsUsingLTO) ||
- (IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full)))
+ if (!IsUsingLTO && !OtherIsUsingLTO && !UnifiedLTO) {
+ if (const Arg *A = Args.getLastArg(options::OPT_O_Group))
+ if (!A->getOption().matches(options::OPT_O0))
+ CmdArgs.push_back("-fwhole-program-vtables");
+ } else if ((!IsUsingLTO && !OtherIsUsingLTO) ||
+ (IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full)))
D.Diag(diag::err_drv_argument_only_allowed_with)
<< "-fwhole-program-vtables"
<< ((IsPS4 && !UnifiedLTO) ? "-flto=full" : "-flto");
diff --git a/clang/test/CodeGenCXX/devirt-single-impl.cpp b/clang/test/CodeGenCXX/devirt-single-impl.cpp
new file mode 100644
index 0000000000000..6ba15cec1ce9b
--- /dev/null
+++ b/clang/test/CodeGenCXX/devirt-single-impl.cpp
@@ -0,0 +1,56 @@
+// Check that speculative devirtualization works without the need for LTO or visibility.
+// RUN: %clang_cc1 -fwhole-program-vtables -O1 %s -emit-llvm -o - | FileCheck %s
+
+struct A {
+ A(){}
+ __attribute__((noinline))
+ virtual int virtual1(){return 20;}
+ __attribute__((noinline))
+ virtual void empty_virtual(){}
+};
+
+struct B : A {
+ B(){}
+ __attribute__((noinline))
+ virtual int virtual1() override {return 50;}
+ __attribute__((noinline))
+ virtual void empty_virtual() override {}
+};
+
+// Test that we can apply speculative devirtualization
+// without the need for LTO or visibility.
+__attribute__((noinline))
+int test_devirtual(A *a) {
+ // CHECK: %0 = load ptr, ptr %vtable, align 8
+ // CHECK-NEXT: %1 = icmp eq ptr %0, @_ZN1B8virtual1Ev
+ // CHECK-NEXT: br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !12
+
+ // CHECK: if.true.direct_targ: ; preds = %entry
+ // CHECK-NEXT: %2 = tail call noundef i32 @_ZN1B8virtual1Ev(ptr noundef nonnull align 8 dereferenceable(8) %a)
+ // CHECK-NEXT: br label %if.end.icp
+
+ // CHECK: if.false.orig_indirect: ; preds = %entry
+ // CHECK-NEXT: %call = tail call noundef i32 %0(ptr noundef nonnull align 8 dereferenceable(8) %a)
+ // CHECK-NEXT: br label %if.end.icp
+
+ // CHECK: if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ
+ // CHECK-NEXT: %3 = phi i32 [ %call, %if.false.orig_indirect ], [ %2, %if.true.direct_targ ]
+ // CHECK-NEXT: ret i32 %3
+
+ return a->virtual1();
+}
+
+// Test that we skip devirtualization for empty virtual functions as most probably
+// they are used for interfaces.
+__attribute__((noinline))
+void test_devirtual_empty_fn(A *a) {
+ // CHECK: load ptr, ptr %vfn, align 8
+ // CHECK-NEXT: tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %a)
+ a->empty_virtual();
+}
+
+void test() {
+ A *a = new B();
+ test_devirtual(a);
+ test_devirtual_empty_fn(a);
+}
\ No newline at end of file
diff --git a/clang/test/Driver/whole-program-vtables.c b/clang/test/Driver/whole-program-vtables.c
index 7f7c45e77f6f5..e0538b584f456 100644
--- a/clang/test/Driver/whole-program-vtables.c
+++ b/clang/test/Driver/whole-program-vtables.c
@@ -1,15 +1,11 @@
-// RUN: not %clang -target x86_64-unknown-linux -fwhole-program-vtables -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
-// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -### -- %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
-// NO-LTO: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
+// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -O1 -### %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s
+// RUN: %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -O1 -### -- %s 2>&1 | FileCheck --check-prefix=WPD-NO-LTO %s
+// WPD-NO-LTO: "-fwhole-program-vtables"
// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO %s
// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO %s
// LTO: "-fwhole-program-vtables"
-/// -funified-lto does not imply -flto, so we still get an error that fwhole-program-vtables has no effect without -flto
-// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -funified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
-// RUN: not %clang --target=x86_64-pc-linux-gnu -fwhole-program-vtables -fno-unified-lto -### %s 2>&1 | FileCheck --check-prefix=NO-LTO %s
-
// RUN: %clang -target x86_64-unknown-linux -fwhole-program-vtables -fno-whole-program-vtables -flto -### %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s
// RUN: not %clang_cl --target=x86_64-pc-win32 -fwhole-program-vtables -fno-whole-program-vtables -flto -### -- %s 2>&1 | FileCheck --check-prefix=LTO-DISABLE %s
// LTO-DISABLE-NOT: "-fwhole-program-vtables"
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 51ccaa53447d7..ee08b11ce2c09 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -98,6 +98,12 @@ class PipelineTuningOptions {
// analyses after various module->function or cgscc->function adaptors in the
// default pipelines.
bool EagerlyInvalidateAnalyses;
+
+ /// Tuning option to enable/disable whole program devirtualization.
+ /// Its default value is false.
+ /// This is controlled by the `-whole-program-vtables` flag.
+ /// Used only in non-LTO mode.
+ bool WholeProgramDevirt;
};
/// This class provides access to building LLVM's passes.
diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 7a03405b4f462..fff27fae162a0 100644
--- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -226,11 +226,15 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
bool UseCommandLine = false;
+ const bool InLTOMode;
WholeProgramDevirtPass()
- : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
+ : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true),
+ InLTOMode(true) {}
WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary)
- : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+ const ModuleSummaryIndex *ImportSummary,
+ bool InLTOMode = true)
+ : ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+ InLTOMode(InLTOMode) {
assert(!(ExportSummary && ImportSummary));
}
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a99146d5eaa34..4b10c63fd4e02 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -321,6 +321,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
MergeFunctions = EnableMergeFunctions;
InlinerThreshold = -1;
EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
+ WholeProgramDevirt = false;
}
namespace llvm {
@@ -1629,6 +1630,23 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (!LTOPreLink)
MPM.addPass(RelLookupTableConverterPass());
+ if (PTO.WholeProgramDevirt && LTOPhase == ThinOrFullLTOPhase::None) {
+ MPM.addPass(WholeProgramDevirtPass(/*ExportSummary*/ nullptr,
+ /*ImportSummary*/ nullptr,
+ /*InLTOMode=*/false));
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
+ lowertypetests::DropTestKind::Assume));
+ if (EnableModuleInliner) {
+ MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
+ UseInlineAdvisor,
+ ThinOrFullLTOPhase::None));
+ } else {
+ MPM.addPass(ModuleInlinerWrapperPass(
+ getInlineParamsFromOptLevel(Level),
+ /* MandatoryFirst */ true,
+ InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner}));
+ }
+ }
return MPM;
}
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 30e1dc7167a39..0fe8a22eb5c0f 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,7 +24,8 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
-// This pass is intended to be used during the regular and thin LTO pipelines:
+// This pass is intended to be used during the regular/thinLTO and non-LTO
+// pipelines:
//
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
@@ -48,6 +49,13 @@
// is supported.
// - Import phase: (same as with hybrid case above).
//
+// In non-LTO mode:
+// - The pass apply speculative devirtualization without requiring any type of
+// visibility.
+// - Skips other features like virtual constant propagation, uniform return
+// value
+// optimization, unique return value optimization, branch funnels to minimize
+// the drawbacks of wrong speculation.
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -60,7 +68,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
@@ -798,6 +808,21 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
+ std::optional<ModuleSummaryIndex> Index;
+ // Force Fallback mode as it's safe in case it's non-LTO mode where
+ // we don't have hidden visibility.
+ if (!InLTOMode) {
+ DevirtCheckMode = WPDCheckMode::Fallback;
+ // In non-LTO mode, we don't have an ExportSummary, so we
+ // build the ExportSummary from the module.
+ assert(!ExportSummary &&
+ "ExportSummary is expected to be empty in non-LTO mode");
+ if (DevirtCheckMode == WPDCheckMode::Fallback && !ExportSummary) {
+ ProfileSummaryInfo PSI(M);
+ Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI));
+ ExportSummary = Index.has_value() ? &Index.value() : nullptr;
+ }
+ }
if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
ImportSummary)
.run())
@@ -1091,10 +1116,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (!TM.Bits->GV->isConstant())
return false;
- // We cannot perform whole program devirtualization analysis on a vtable
- // with public LTO visibility.
- if (TM.Bits->GV->getVCallVisibility() ==
- GlobalObject::VCallVisibilityPublic)
+ // If speculative devirtualization is NOT enabled, it's not safe to perform
+ // whole program devirtualization
+ // analysis on a vtable with public LTO visibility.
+ if (DevirtCheckMode != WPDCheckMode::Fallback &&
+ TM.Bits->GV->getVCallVisibility() ==
+ GlobalObject::VCallVisibilityPublic)
return false;
Function *Fn = nullptr;
@@ -1112,6 +1139,11 @@ bool DevirtModule::tryFindVirtualCallTargets(
// calls to pure virtuals are UB.
if (Fn->getName() == "__cxa_pure_virtual")
continue;
+ // In Most cases empty functions will be overridden by the
+ // implementation of the derived class, so we can skip them.
+ if (DevirtCheckMode == WPDCheckMode::Fallback &&
+ Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1)
+ continue;
// We can disregard unreachable functions as possible call targets, as
// unreachable functions shouldn't be called.
@@ -1333,10 +1365,11 @@ bool DevirtModule::trySingleImplDevirt(
if (!IsExported)
return false;
- // If the only implementation has local linkage, we must promote to external
- // to make it visible to thin LTO objects. We can only get here during the
- // ThinLTO export phase.
- if (TheFn->hasLocalLinkage()) {
+ // In case of non-speculative devirtualization, If the only implementation has
+ // local linkage, we must promote to external
+ // to make it visible to thin LTO objects. We can only get here during the
+ // ThinLTO export phase.
+ if (DevirtCheckMode != WPDCheckMode::Fallback && TheFn->hasLocalLinkage()) {
std::string NewName = (TheFn->getName() + ".llvm.merged").str();
// Since we are renaming the function, any comdats with the same name must
@@ -2315,6 +2348,11 @@ bool DevirtModule::run() {
Function *TypeTestFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
+ // If we are applying speculative devirtualization, we can work on the public
+ // type test intrinsics.
+ if (!TypeTestFunc && DevirtCheckMode == WPDCheckMode::Fallback)
+ TypeTestFunc =
+ Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
Function *TypeCheckedLoadFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
@@ -2437,12 +2475,18 @@ bool DevirtModule::run() {
.WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
S.first.ByteOffset, ExportSummary)) {
-
- if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
- DidVirtualConstProp |=
- tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
-
- tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
+ trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
+ // In Speculative devirt mode, we skip virtual constant propagation
+ // and branch funneling to minimize the drawback if we got wrong
+ // speculation during devirtualization.
+ if (DevirtCheckMode != WPDCheckMode::Fallback) {
+ if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second,
+ Res)) {
+ DidVirtualConstProp |=
+ tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
+
+ tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
+ }
}
// Collect functions devirtualized at least for one call site for stats.
>From 77d16b271f6c6ac7097cc174043fb14d81a09231 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Sat, 12 Jul 2025 04:19:47 +0000
Subject: [PATCH 2/2] Resolve review comments
Change-Id: I3f6210cd7a088ae7186594d62b2d4f056fc5e732
---
clang/test/CodeGenCXX/devirt-single-impl.cpp | 56 --------
clang/test/CodeGenCXX/type-metadata.cpp | 8 ++
.../lib/Transforms/IPO/WholeProgramDevirt.cpp | 25 ++--
.../devirt-single-impl-nolto.ll | 130 ++++++++++++++++++
.../virtual-const-prop-check.ll | 7 +
5 files changed, 162 insertions(+), 64 deletions(-)
delete mode 100644 clang/test/CodeGenCXX/devirt-single-impl.cpp
create mode 100644 llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-nolto.ll
diff --git a/clang/test/CodeGenCXX/devirt-single-impl.cpp b/clang/test/CodeGenCXX/devirt-single-impl.cpp
deleted file mode 100644
index 6ba15cec1ce9b..0000000000000
--- a/clang/test/CodeGenCXX/devirt-single-impl.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-// Check that speculative devirtualization works without the need for LTO or visibility.
-// RUN: %clang_cc1 -fwhole-program-vtables -O1 %s -emit-llvm -o - | FileCheck %s
-
-struct A {
- A(){}
- __attribute__((noinline))
- virtual int virtual1(){return 20;}
- __attribute__((noinline))
- virtual void empty_virtual(){}
-};
-
-struct B : A {
- B(){}
- __attribute__((noinline))
- virtual int virtual1() override {return 50;}
- __attribute__((noinline))
- virtual void empty_virtual() override {}
-};
-
-// Test that we can apply speculative devirtualization
-// without the need for LTO or visibility.
-__attribute__((noinline))
-int test_devirtual(A *a) {
- // CHECK: %0 = load ptr, ptr %vtable, align 8
- // CHECK-NEXT: %1 = icmp eq ptr %0, @_ZN1B8virtual1Ev
- // CHECK-NEXT: br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !12
-
- // CHECK: if.true.direct_targ: ; preds = %entry
- // CHECK-NEXT: %2 = tail call noundef i32 @_ZN1B8virtual1Ev(ptr noundef nonnull align 8 dereferenceable(8) %a)
- // CHECK-NEXT: br label %if.end.icp
-
- // CHECK: if.false.orig_indirect: ; preds = %entry
- // CHECK-NEXT: %call = tail call noundef i32 %0(ptr noundef nonnull align 8 dereferenceable(8) %a)
- // CHECK-NEXT: br label %if.end.icp
-
- // CHECK: if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ
- // CHECK-NEXT: %3 = phi i32 [ %call, %if.false.orig_indirect ], [ %2, %if.true.direct_targ ]
- // CHECK-NEXT: ret i32 %3
-
- return a->virtual1();
-}
-
-// Test that we skip devirtualization for empty virtual functions as most probably
-// they are used for interfaces.
-__attribute__((noinline))
-void test_devirtual_empty_fn(A *a) {
- // CHECK: load ptr, ptr %vfn, align 8
- // CHECK-NEXT: tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %a)
- a->empty_virtual();
-}
-
-void test() {
- A *a = new B();
- test_devirtual(a);
- test_devirtual_empty_fn(a);
-}
\ No newline at end of file
diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp
index 1cb2fed8db3e6..faf0950cae936 100644
--- a/clang/test/CodeGenCXX/type-metadata.cpp
+++ b/clang/test/CodeGenCXX/type-metadata.cpp
@@ -14,6 +14,9 @@
// RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s
// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s
+// Test for the whole-program-vtables feature in nonlto mode:
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO %s
+
// Tests for cfi + whole-program-vtables:
// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s
// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s
@@ -178,6 +181,7 @@ void af(A *a) {
// TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
// TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
// TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@")
+ // TT-ITANIUM-DEFAULT-NOLTO: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
// TC-ITANIUM: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A")
// TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A")
// TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@")
@@ -212,6 +216,7 @@ void df1(D *d) {
// TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
// TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
// TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@")
+ // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
// TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]])
// TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]])
// TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@")
@@ -224,6 +229,7 @@ void dg1(D *d) {
// TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
// TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
// TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@")
+ // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
// TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B")
// TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B")
// TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@")
@@ -236,6 +242,7 @@ void dh1(D *d) {
// TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
// TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
// TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
+ // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
// TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]])
// TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]])
// TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]])
@@ -297,6 +304,7 @@ void f(D *d) {
// TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
// TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
// TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA at test2@@")
+ // TT-ITANIUM-DEFAULT-NOLTO: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
// TC-ITANIUM: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE")
// TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE")
// TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA at test2@@")
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 0fe8a22eb5c0f..0fae02a3e89e1 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -221,6 +221,14 @@ static cl::opt<WPDCheckMode> DevirtCheckMode(
clEnumValN(WPDCheckMode::Fallback, "fallback",
"Fallback to indirect when incorrect")));
+// This pass runs mainly in lto mode, it can run in nonlto mode for limited
+// features. For testing, provide a way to tell that we are running in nonlto
+// mode.
+static cl::opt<bool>
+ TestNoLTOMode("wholeprogramdevirt-nolto", cl::Hidden,
+ cl::desc("Run whole program devirt outside LTO mode."),
+ cl::init(false));
+
namespace {
struct PatternList {
std::vector<GlobPattern> Patterns;
@@ -804,6 +812,9 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return FAM.getResult<DominatorTreeAnalysis>(F);
};
if (UseCommandLine) {
+ if (TestNoLTOMode)
+ // we are outside LTO mode. enable speculative devirtualization:
+ DevirtCheckMode = WPDCheckMode::Fallback;
if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -2475,18 +2486,16 @@ bool DevirtModule::run() {
.WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
S.first.ByteOffset, ExportSummary)) {
- trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
+ bool SingleImplDevirt =
+ trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
// In Speculative devirt mode, we skip virtual constant propagation
// and branch funneling to minimize the drawback if we got wrong
// speculation during devirtualization.
- if (DevirtCheckMode != WPDCheckMode::Fallback) {
- if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second,
- Res)) {
- DidVirtualConstProp |=
- tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
+ if (!SingleImplDevirt && DevirtCheckMode != WPDCheckMode::Fallback) {
+ DidVirtualConstProp |=
+ tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
- tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
- }
+ tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
}
// Collect functions devirtualized at least for one call site for stats.
diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-nolto.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-nolto.ll
new file mode 100644
index 0000000000000..82d0be85ffc71
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-nolto.ll
@@ -0,0 +1,130 @@
+; -stats requires asserts
+; REQUIRES: asserts
+
+; Check that we can still devirtualize outside LTO mode
+; Check that we skip devirtualization for empty functions outside LTO mode
+
+; RUN: opt -S -passes=wholeprogramdevirt -wholeprogramdevirt-nolto -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:13:0: devirtualized vf
+; CHECK-NOT: devirtualized
+
+ at vt1 = constant [1 x ptr] [ptr @vf], !type !8
+ at vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12
+
+define i1 @vf(ptr %this) #0 !dbg !7 {
+ ret i1 true
+}
+
+; This should NOT be devietualized because during non-lto empty functions
+; are skipped.
+define void @vf_empty(ptr %this) !dbg !11 {
+ ret void
+}
+
+; CHECK: define void @call
+define void @call(ptr %obj) #1 !dbg !5 {
+ %vtable = load ptr, ptr %obj
+ %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptr = load ptr, ptr %vtable
+ ; CHECK: if.true.direct_targ:
+ ; CHECK: call i1 @vf(
+ ; CHECK: if.false.orig_indirect:
+ ; CHECK: call i1 %fptr(
+ call i1 %fptr(ptr %obj), !dbg !6
+ ret void
+}
+
+
+; CHECK: define void @call1
+define void @call1(ptr %obj) #1 !dbg !9 {
+ %vtable = load ptr, ptr %obj
+ %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
+ call void @llvm.assume(i1 %p)
+ %fptr = load ptr, ptr %vtable, align 8
+ ; CHECK: call i1 %fptr
+ %1 = call i1 %fptr(ptr %obj), !dbg !10
+ ret void
+}
+declare ptr @llvm.load.relative.i32(ptr, i32)
+
+ at vt3 = private unnamed_addr constant [1 x i32] [
+ i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32)
+], align 4, !type !15
+
+; CHECK: define void @call2
+define void @call2(ptr %obj) #1 !dbg !13 {
+ %vtable = load ptr, ptr %obj
+ %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2")
+ call void @llvm.assume(i1 %p)
+ %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
+ ; CHECK: if.true.direct_targ:
+ ; CHECK: call i1 @vf(
+ ; CHECK: if.false.orig_indirect:
+ ; CHECK: call i1 %fptr(
+ call i1 %fptr(ptr %obj), !dbg !14
+ ret void
+}
+
+ at _ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [
+ i32 0, ; offset to top
+ i32 0, ; rtti
+ i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vf_emptyunc offset
+] }, align 4, !type !18
+
+; CHECK: define void @call3
+define void @call3(ptr %obj) #1 !dbg !16 {
+ %vtable = load ptr, ptr %obj
+ %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
+ call void @llvm.assume(i1 %p)
+ %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8)
+ ; CHECK: if.true.direct_targ:
+ ; CHECK: call i1 @vf(
+ ; CHECK: if.false.orig_indirect:
+ ; CHECK: call i1 %fptr(
+ call i1 %fptr(ptr %obj), !dbg !17
+ ret void
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "devirt-single.cc", directory: ".")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang version 4.0.0 (trunk 278098)"}
+!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!6 = !DILocation(line: 30, column: 32, scope: !5)
+!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!8 = !{i32 0, !"typeid"}
+
+!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!10 = !DILocation(line: 35, column: 32, scope: !9)
+!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!12 = !{i32 0, !"typeid1"}
+
+!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!14 = !DILocation(line: 41, column: 32, scope: !13)
+!15 = !{i32 0, !"typeid2"}
+
+!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!17 = !DILocation(line: 51, column: 32, scope: !16)
+!18 = !{i32 0, !"typeid3"}
+
+
+
+; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets
+; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations
diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
index d8f5c912e9a50..f2cf45bb21808 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
@@ -11,6 +11,9 @@
; Check wildcard
; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP
+; Check that no stats are reported when we enable devirtualization out of LTO mode.
+; RUN: opt -S -passes=wholeprogramdevirt -wholeprogramdevirt-nolto -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-WPD-NOLTO
+
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"
@@ -225,3 +228,7 @@ declare ptr @llvm.load.relative.i32(ptr, i32)
; CHECK: 2 wholeprogramdevirt - Number of unique return value optimizations
; CHECK: 2 wholeprogramdevirt - Number of virtual constant propagations
; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations
+
+; CHECK-WPD-NOLTO-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations
+; CHECK-WPD-NOLTO-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations
+; CHECK-WPD-NOLTO-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations
More information about the llvm-commits
mailing list