[llvm] [WPD]: Enable speculative devirtualizatoin. (PR #159048)
Hassnaa Hamdi via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 17:07:19 PDT 2025
https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/159048
>From 0b5d559f7d2ad8de2c12787904b5f09d5682006e Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Mon, 15 Sep 2025 23:41:28 +0000
Subject: [PATCH 1/3] [WPD]: Enable speculative devirtualizatoin.
- Add cl::opt 'devirtualize-speculatively' to enable it.
- Flag is disabled by default.
- It works regardless of the visibility of the object.
---
.../lib/Transforms/IPO/WholeProgramDevirt.cpp | 64 +++++++--
.../speculative-devirt-single-impl.ll | 131 ++++++++++++++++++
.../virtual-const-prop-check.ll | 7 +
3 files changed, 187 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 09bffa7bf5846..64f574be8fd0e 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,7 +24,8 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
-// This pass is intended to be used during the regular and thin LTO pipelines:
+// This pass is intended to be used during the regular/thin and non-LTO
+// pipelines:
//
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
@@ -48,6 +49,14 @@
// is supported.
// - Import phase: (same as with hybrid case above).
//
+// During Speculative devirtualization mode -not restricted to LTO-:
+// - The pass applies speculative devirtualization without requiring any type of
+// visibility.
+// - Skips other features like virtual constant propagation, uniform return
+// value optimization, unique return value optimization and branch funnels as
+// they need LTO.
+// - This mode is enabled via 'devirtualize-speculatively' flag.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -61,7 +70,9 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
@@ -120,6 +131,13 @@ STATISTIC(NumVirtConstProp1Bit,
"Number of 1 bit virtual constant propagations");
STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
+// TODO: This option eventually should support any public visibility vtables
+// with/out LTO.
+static cl::opt<bool> ClDevirtualizeSpeculatively(
+ "devirtualize-speculatively",
+ cl::desc("Enable speculative devirtualization optimization"),
+ cl::init(false));
+
static cl::opt<PassSummaryAction> ClSummaryAction(
"wholeprogramdevirt-summary-action",
cl::desc("What to do with the summary when running this pass"),
@@ -1083,10 +1101,10 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (!TM.Bits->GV->isConstant())
return false;
- // We cannot perform whole program devirtualization analysis on a vtable
- // with public LTO visibility.
- if (TM.Bits->GV->getVCallVisibility() ==
- GlobalObject::VCallVisibilityPublic)
+ // Without ClDevirtualizeSpeculatively, we cannot perform whole program
+ // devirtualization analysis on a vtable with public LTO visibility.
+ if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
+ GlobalObject::VCallVisibilityPublic)
return false;
Function *Fn = nullptr;
@@ -1105,6 +1123,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (Fn->getName() == "__cxa_pure_virtual")
continue;
+ // In most cases empty functions will be overridden by the
+ // implementation of the derived class, so we can skip them.
+ if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() &&
+ Fn->getInstructionCount() <= 1)
+ continue;
+
// We can disregard unreachable functions as possible call targets, as
// unreachable functions shouldn't be called.
if (mustBeUnreachableFunction(Fn, ExportSummary))
@@ -1223,10 +1247,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
CallTrap->setDebugLoc(CB.getDebugLoc());
}
- // If fallback checking is enabled, add support to compare the virtual
- // function pointer to the devirtualized target. In case of a mismatch,
- // fall back to indirect call.
- if (DevirtCheckMode == WPDCheckMode::Fallback) {
+ // If fallback checking or speculative devirtualization are enabled,
+ // add support to compare the virtual function pointer to the
+ // devirtualized target. In case of a mismatch, fall back to indirect
+ // call.
+ if (DevirtCheckMode == WPDCheckMode::Fallback ||
+ ClDevirtualizeSpeculatively) {
MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
// Version the indirect call site. If the called value is equal to the
// given callee, 'NewInst' will be executed, otherwise the original call
@@ -1325,10 +1351,10 @@ bool DevirtModule::trySingleImplDevirt(
if (!IsExported)
return false;
- // If the only implementation has local linkage, we must promote to external
- // to make it visible to thin LTO objects. We can only get here during the
- // ThinLTO export phase.
- if (TheFn->hasLocalLinkage()) {
+ // Out of speculative devirtualization mode, if the only implementation has
+ // local linkage, we must promote to external to make it visible to thin LTO
+ // objects.
+ if (!ClDevirtualizeSpeculatively && TheFn->hasLocalLinkage()) {
std::string NewName = (TheFn->getName() + ".llvm.merged").str();
// Since we are renaming the function, any comdats with the same name must
@@ -2350,6 +2376,11 @@ bool DevirtModule::run() {
Function *TypeTestFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
+ // If we are in speculative devirtualization mode, we can work on the public
+ // type test intrinsics.
+ if (!TypeTestFunc && ClDevirtualizeSpeculatively)
+ TypeTestFunc =
+ Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
Function *TypeCheckedLoadFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
@@ -2472,8 +2503,11 @@ bool DevirtModule::run() {
.WPDRes[S.first.ByteOffset];
if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
S.first.ByteOffset, ExportSummary)) {
-
- if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
+ bool SingleImplDevirt =
+ trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
+ // Out of speculative devirtualization mode, Try to apply virtual constant
+ // propagation or branch funneling.
+ if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
DidVirtualConstProp |=
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
diff --git a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
new file mode 100644
index 0000000000000..b7d1bda8d133b
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
@@ -0,0 +1,131 @@
+; -stats requires asserts
+; REQUIRES: asserts
+
+; Check that we can still devirtualize outside LTO mode when speculative devirtualization is enabled.
+; Check that we skip devirtualization for empty functions in speculative devirtualization mode
+
+; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively \
+; RUN: -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:13:0: devirtualized vf
+; CHECK-NOT: devirtualized
+
+ at vt1 = constant [1 x ptr] [ptr @vf], !type !8
+ at vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12
+
+define i1 @vf(ptr %this) #0 !dbg !7 {
+ ret i1 true
+}
+
+; This should NOT be devirtualized because during non-lto empty functions
+; are skipped.
+define void @vf_empty(ptr %this) !dbg !11 {
+ ret void
+}
+
+; CHECK: define void @call
+define void @call(ptr %obj) #1 !dbg !5 {
+ %vtable = load ptr, ptr %obj
+ %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptr = load ptr, ptr %vtable
+ ; CHECK: if.true.direct_targ:
+ ; CHECK: call i1 @vf(
+ ; CHECK: if.false.orig_indirect:
+ ; CHECK: call i1 %fptr(
+ call i1 %fptr(ptr %obj), !dbg !6
+ ret void
+}
+
+
+; CHECK: define void @call1
+define void @call1(ptr %obj) #1 !dbg !9 {
+ %vtable = load ptr, ptr %obj
+ %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
+ call void @llvm.assume(i1 %p)
+ %fptr = load ptr, ptr %vtable, align 8
+ ; CHECK: call i1 %fptr
+ %1 = call i1 %fptr(ptr %obj), !dbg !10
+ ret void
+}
+declare ptr @llvm.load.relative.i32(ptr, i32)
+
+ at vt3 = private unnamed_addr constant [1 x i32] [
+ i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32)
+], align 4, !type !15
+
+; CHECK: define void @call2
+define void @call2(ptr %obj) #1 !dbg !13 {
+ %vtable = load ptr, ptr %obj
+ %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2")
+ call void @llvm.assume(i1 %p)
+ %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
+ ; CHECK: if.true.direct_targ:
+ ; CHECK: call i1 @vf(
+ ; CHECK: if.false.orig_indirect:
+ ; CHECK: call i1 %fptr(
+ call i1 %fptr(ptr %obj), !dbg !14
+ ret void
+}
+
+ at _ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [
+ i32 0, ; offset to top
+ i32 0, ; rtti
+ i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vf_emptyunc offset
+] }, align 4, !type !18
+
+; CHECK: define void @call3
+define void @call3(ptr %obj) #1 !dbg !16 {
+ %vtable = load ptr, ptr %obj
+ %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
+ call void @llvm.assume(i1 %p)
+ %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8)
+ ; CHECK: if.true.direct_targ:
+ ; CHECK: call i1 @vf(
+ ; CHECK: if.false.orig_indirect:
+ ; CHECK: call i1 %fptr(
+ call i1 %fptr(ptr %obj), !dbg !17
+ ret void
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "devirt-single.cc", directory: ".")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang version 4.0.0 (trunk 278098)"}
+!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!6 = !DILocation(line: 30, column: 32, scope: !5)
+!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!8 = !{i32 0, !"typeid"}
+
+!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!10 = !DILocation(line: 35, column: 32, scope: !9)
+!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!12 = !{i32 0, !"typeid1"}
+
+!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!14 = !DILocation(line: 41, column: 32, scope: !13)
+!15 = !{i32 0, !"typeid2"}
+
+!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!17 = !DILocation(line: 51, column: 32, scope: !16)
+!18 = !{i32 0, !"typeid3"}
+
+
+
+; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets
+; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations
diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
index d8f5c912e9a50..8327e1cfdf1d2 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
@@ -11,6 +11,9 @@
; Check wildcard
; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP
+; Check that no stats are reported in speculative devirtualization mode as the virtual const prop is disabled.
+; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-SPECULATIVE-WPD
+
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"
@@ -225,3 +228,7 @@ declare ptr @llvm.load.relative.i32(ptr, i32)
; CHECK: 2 wholeprogramdevirt - Number of unique return value optimizations
; CHECK: 2 wholeprogramdevirt - Number of virtual constant propagations
; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations
+
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations
>From d066574fd5eb22320b5fcf544f6edded02f4e6c1 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Sun, 28 Sep 2025 13:40:44 +0000
Subject: [PATCH 2/3] resolve review comments:
- Drop commit where we were building ExportSummary as it can't be
tested in this patch.
- Consider the case where we have public/non-public type tests
- Use LTO flag to differentiate between LTO mode and non-LTO mode.
Change-Id: I52acd6e7352fe1075c9f990aa78dfdc35b89f124
---
.../llvm/Transforms/IPO/WholeProgramDevirt.h | 12 +++--
.../lib/Transforms/IPO/WholeProgramDevirt.cpp | 45 +++++++++++--------
.../speculative-devirt-single-impl.ll | 3 +-
3 files changed, 38 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 7a03405b4f462..b39ef684761a9 100644
--- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -226,11 +226,17 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
bool UseCommandLine = false;
+ // Default value for LTO mode is true, as this is the default behavior for
+ // whole program devirtualization unless explicitly disabled.
+ const bool InLTOMode;
WholeProgramDevirtPass()
- : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
+ : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true),
+ InLTOMode(true) {}
WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary)
- : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+ const ModuleSummaryIndex *ImportSummary,
+ bool InLTOMode = true)
+ : ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+ InLTOMode(InLTOMode) {
assert(!(ExportSummary && ImportSummary));
}
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 64f574be8fd0e..914d5f7d0092d 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -602,6 +602,8 @@ struct DevirtModule {
ModuleSummaryIndex *const ExportSummary;
const ModuleSummaryIndex *const ImportSummary;
+ const bool InLTOMode;
+
IntegerType *const Int8Ty;
PointerType *const Int8PtrTy;
IntegerType *const Int32Ty;
@@ -638,11 +640,11 @@ struct DevirtModule {
DevirtModule(Module &M, ModuleAnalysisManager &MAM,
ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary)
+ const ModuleSummaryIndex *ImportSummary, bool InLTOMode = true)
: M(M), MAM(MAM),
FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
ExportSummary(ExportSummary), ImportSummary(ImportSummary),
- Int8Ty(Type::getInt8Ty(M.getContext())),
+ InLTOMode(InLTOMode), Int8Ty(Type::getInt8Ty(M.getContext())),
Int8PtrTy(PointerType::getUnqual(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
Int64Ty(Type::getInt64Ty(M.getContext())),
@@ -812,7 +814,7 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
- if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run())
+ if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, InLTOMode).run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
@@ -910,6 +912,8 @@ void llvm::updatePublicTypeTestCalls(Module &M,
CI->eraseFromParent();
}
} else {
+ // TODO: Don't replace public type tests when speculative devirtualization
+ // gets enabled in LTO mode.
auto *True = ConstantInt::getTrue(M.getContext());
for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {
auto *CI = cast<CallInst>(U.getUser());
@@ -1351,10 +1355,10 @@ bool DevirtModule::trySingleImplDevirt(
if (!IsExported)
return false;
- // Out of speculative devirtualization mode, if the only implementation has
- // local linkage, we must promote to external to make it visible to thin LTO
- // objects.
- if (!ClDevirtualizeSpeculatively && TheFn->hasLocalLinkage()) {
+ // If the only implementation has local linkage, we must promote
+ // to external to make it visible to thin LTO objects.
+ // This change should be safe only in LTO mode.
+ if (InLTOMode && TheFn->hasLocalLinkage()) {
std::string NewName = (TheFn->getName() + ".llvm.merged").str();
// Since we are renaming the function, any comdats with the same name must
@@ -2083,15 +2087,15 @@ void DevirtModule::scanTypeTestUsers(
Function *TypeTestFunc,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
// Find all virtual calls via a virtual table pointer %p under an assumption
- // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
- // points to a member of the type identifier %md. Group calls by (type ID,
- // offset) pair (effectively the identity of the virtual function) and store
- // to CallSlots.
+ // of the form llvm.assume(llvm.type.test(%p, %md)) or
+ // llvm.assume(llvm.public.type.test(%p, %md)).
+ // This indicates that %p points to a member of the type identifier %md.
+ // Group calls by (type ID, offset) pair (effectively the identity of the
+ // virtual function) and store to CallSlots.
for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;
-
// Search for virtual calls based on %p and add them to DevirtCalls.
SmallVector<DevirtCallSite, 1> DevirtCalls;
SmallVector<CallInst *, 1> Assumes;
@@ -2374,13 +2378,14 @@ bool DevirtModule::run() {
(ImportSummary && ImportSummary->partiallySplitLTOUnits()))
return false;
- Function *TypeTestFunc =
- Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
+ Function *PublicTypeTestFunc = nullptr;
// If we are in speculative devirtualization mode, we can work on the public
// type test intrinsics.
- if (!TypeTestFunc && ClDevirtualizeSpeculatively)
- TypeTestFunc =
+ if (ClDevirtualizeSpeculatively)
+ PublicTypeTestFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
+ Function *TypeTestFunc =
+ Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
Function *TypeCheckedLoadFunc =
Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
@@ -2392,8 +2397,9 @@ bool DevirtModule::run() {
// module, this pass has nothing to do. But if we are exporting, we also need
// to handle any users that appear only in the function summaries.
if (!ExportSummary &&
- (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
- AssumeFunc->use_empty()) &&
+ (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) &&
+ (!TypeTestFunc || TypeTestFunc->use_empty())) ||
+ !AssumeFunc || AssumeFunc->use_empty()) &&
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
(!TypeCheckedLoadRelativeFunc ||
TypeCheckedLoadRelativeFunc->use_empty()))
@@ -2404,6 +2410,9 @@ bool DevirtModule::run() {
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
buildTypeIdentifierMap(Bits, TypeIdMap);
+ if (PublicTypeTestFunc && AssumeFunc)
+ scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap);
+
if (TypeTestFunc && AssumeFunc)
scanTypeTestUsers(TypeTestFunc, TypeIdMap);
diff --git a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
index b7d1bda8d133b..10566ae0dee8e 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
@@ -32,7 +32,7 @@ define void @vf_empty(ptr %this) !dbg !11 {
; CHECK: define void @call
define void @call(ptr %obj) #1 !dbg !5 {
%vtable = load ptr, ptr %obj
- %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid")
+ %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid")
call void @llvm.assume(i1 %p)
%fptr = load ptr, ptr %vtable
; CHECK: if.true.direct_targ:
@@ -96,6 +96,7 @@ define void @call3(ptr %obj) #1 !dbg !16 {
declare i1 @llvm.type.test(ptr, metadata)
+declare i1 @llvm.public.type.test(ptr, metadata)
declare void @llvm.assume(i1)
!llvm.dbg.cu = !{!0}
>From d558821cb162b69dfe7caa1bb503117eadbbe4ab Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 1 Oct 2025 23:31:59 +0000
Subject: [PATCH 3/3] Remove InLTOMode flag. Add a flag to differentiate
between ExportSummary that is built locally and the external one.
---
.../llvm/Transforms/IPO/WholeProgramDevirt.h | 16 +++++++---------
llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp | 17 +++++++++++------
2 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index b39ef684761a9..d500abb901b39 100644
--- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -226,17 +226,15 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
bool UseCommandLine = false;
- // Default value for LTO mode is true, as this is the default behavior for
- // whole program devirtualization unless explicitly disabled.
- const bool InLTOMode;
+ // True if ExportSummary was built locally from the module rather than
+ // provided externally to the pass (e.g., during LTO). Default value is false
+ // unless explicitly set when the Summary is explicitly built.
+ bool HasLocalSummary = false;
WholeProgramDevirtPass()
- : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true),
- InLTOMode(true) {}
+ : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary,
- bool InLTOMode = true)
- : ExportSummary(ExportSummary), ImportSummary(ImportSummary),
- InLTOMode(InLTOMode) {
+ const ModuleSummaryIndex *ImportSummary)
+ : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
assert(!(ExportSummary && ImportSummary));
}
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 914d5f7d0092d..4a997cac1c214 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -601,8 +601,9 @@ struct DevirtModule {
ModuleSummaryIndex *const ExportSummary;
const ModuleSummaryIndex *const ImportSummary;
-
- const bool InLTOMode;
+ // True if ExportSummary was built locally from the module.
+ // Default is false unless explicitly set.
+ const bool HasLocalSummary;
IntegerType *const Int8Ty;
PointerType *const Int8PtrTy;
@@ -640,11 +641,13 @@ struct DevirtModule {
DevirtModule(Module &M, ModuleAnalysisManager &MAM,
ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary, bool InLTOMode = true)
+ const ModuleSummaryIndex *ImportSummary,
+ bool HasLocalSummary = false)
: M(M), MAM(MAM),
FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
ExportSummary(ExportSummary), ImportSummary(ImportSummary),
- InLTOMode(InLTOMode), Int8Ty(Type::getInt8Ty(M.getContext())),
+ HasLocalSummary(HasLocalSummary),
+ Int8Ty(Type::getInt8Ty(M.getContext())),
Int8PtrTy(PointerType::getUnqual(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
Int64Ty(Type::getInt64Ty(M.getContext())),
@@ -814,7 +817,8 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
- if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, InLTOMode).run())
+ if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, HasLocalSummary)
+ .run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
@@ -1358,7 +1362,7 @@ bool DevirtModule::trySingleImplDevirt(
// If the only implementation has local linkage, we must promote
// to external to make it visible to thin LTO objects.
// This change should be safe only in LTO mode.
- if (InLTOMode && TheFn->hasLocalLinkage()) {
+ if (!HasLocalSummary && TheFn->hasLocalLinkage()) {
std::string NewName = (TheFn->getName() + ".llvm.merged").str();
// Since we are renaming the function, any comdats with the same name must
@@ -2516,6 +2520,7 @@ bool DevirtModule::run() {
trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
// Out of speculative devirtualization mode, Try to apply virtual constant
// propagation or branch funneling.
+ // TODO: This should eventually be enabled for non-public type tests.
if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
DidVirtualConstProp |=
tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
More information about the llvm-commits
mailing list