[clang] [llvm] [Clang]: Enable speculative devirtualization (PR #159685)

Hassnaa Hamdi via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 24 00:46:53 PDT 2025


https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/159685

>From 0b5d559f7d2ad8de2c12787904b5f09d5682006e Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Mon, 15 Sep 2025 23:41:28 +0000
Subject: [PATCH 1/3] [WPD]: Enable speculative devirtualizatoin.

- Add cl::opt 'devirtualize-speculatively' to enable it.
- Flag is disabled by default.
- It works regardless of the visibility of the object.
---
 .../lib/Transforms/IPO/WholeProgramDevirt.cpp |  64 +++++++--
 .../speculative-devirt-single-impl.ll         | 131 ++++++++++++++++++
 .../virtual-const-prop-check.ll               |   7 +
 3 files changed, 187 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll

diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 09bffa7bf5846..64f574be8fd0e 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,7 +24,8 @@
 //   returns 0, or a single vtable's function returns 1, replace each virtual
 //   call with a comparison of the vptr against that vtable's address.
 //
-// This pass is intended to be used during the regular and thin LTO pipelines:
+// This pass is intended to be used during the regular/thin and non-LTO
+// pipelines:
 //
 // During regular LTO, the pass determines the best optimization for each
 // virtual call and applies the resolutions directly to virtual calls that are
@@ -48,6 +49,14 @@
 //   is supported.
 // - Import phase: (same as with hybrid case above).
 //
+// During Speculative devirtualization mode -not restricted to LTO-:
+// - The pass applies speculative devirtualization without requiring any type of
+//   visibility.
+// - Skips other features like virtual constant propagation, uniform return
+//   value optimization, unique return value optimization and branch funnels as
+//   they need LTO.
+// - This mode is enabled via 'devirtualize-speculatively' flag.
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -61,7 +70,9 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
@@ -120,6 +131,13 @@ STATISTIC(NumVirtConstProp1Bit,
           "Number of 1 bit virtual constant propagations");
 STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
 
+// TODO: This option eventually should support any public visibility vtables
+// with/out LTO.
+static cl::opt<bool> ClDevirtualizeSpeculatively(
+    "devirtualize-speculatively",
+    cl::desc("Enable speculative devirtualization optimization"),
+    cl::init(false));
+
 static cl::opt<PassSummaryAction> ClSummaryAction(
     "wholeprogramdevirt-summary-action",
     cl::desc("What to do with the summary when running this pass"),
@@ -1083,10 +1101,10 @@ bool DevirtModule::tryFindVirtualCallTargets(
     if (!TM.Bits->GV->isConstant())
       return false;
 
-    // We cannot perform whole program devirtualization analysis on a vtable
-    // with public LTO visibility.
-    if (TM.Bits->GV->getVCallVisibility() ==
-        GlobalObject::VCallVisibilityPublic)
+    // Without ClDevirtualizeSpeculatively, we cannot perform whole program
+    // devirtualization analysis on a vtable with public LTO visibility.
+    if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
+                                            GlobalObject::VCallVisibilityPublic)
       return false;
 
     Function *Fn = nullptr;
@@ -1105,6 +1123,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
     if (Fn->getName() == "__cxa_pure_virtual")
       continue;
 
+    // In most cases empty functions will be overridden by the
+    // implementation of the derived class, so we can skip them.
+    if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() &&
+        Fn->getInstructionCount() <= 1)
+      continue;
+
     // We can disregard unreachable functions as possible call targets, as
     // unreachable functions shouldn't be called.
     if (mustBeUnreachableFunction(Fn, ExportSummary))
@@ -1223,10 +1247,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
         CallTrap->setDebugLoc(CB.getDebugLoc());
       }
 
-      // If fallback checking is enabled, add support to compare the virtual
-      // function pointer to the devirtualized target. In case of a mismatch,
-      // fall back to indirect call.
-      if (DevirtCheckMode == WPDCheckMode::Fallback) {
+      // If fallback checking or speculative devirtualization are enabled,
+      // add support to compare the virtual function pointer to the
+      // devirtualized target. In case of a mismatch, fall back to indirect
+      // call.
+      if (DevirtCheckMode == WPDCheckMode::Fallback ||
+          ClDevirtualizeSpeculatively) {
         MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
         // Version the indirect call site. If the called value is equal to the
         // given callee, 'NewInst' will be executed, otherwise the original call
@@ -1325,10 +1351,10 @@ bool DevirtModule::trySingleImplDevirt(
   if (!IsExported)
     return false;
 
-  // If the only implementation has local linkage, we must promote to external
-  // to make it visible to thin LTO objects. We can only get here during the
-  // ThinLTO export phase.
-  if (TheFn->hasLocalLinkage()) {
+  // Out of speculative devirtualization mode, if the only implementation has
+  // local linkage, we must promote to external to make it visible to thin LTO
+  // objects.
+  if (!ClDevirtualizeSpeculatively && TheFn->hasLocalLinkage()) {
     std::string NewName = (TheFn->getName() + ".llvm.merged").str();
 
     // Since we are renaming the function, any comdats with the same name must
@@ -2350,6 +2376,11 @@ bool DevirtModule::run() {
 
   Function *TypeTestFunc =
       Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
+  // If we are in speculative devirtualization mode, we can work on the public
+  // type test intrinsics.
+  if (!TypeTestFunc && ClDevirtualizeSpeculatively)
+    TypeTestFunc =
+        Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
   Function *TypeCheckedLoadFunc =
       Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_checked_load);
   Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
@@ -2472,8 +2503,11 @@ bool DevirtModule::run() {
                  .WPDRes[S.first.ByteOffset];
     if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
                                   S.first.ByteOffset, ExportSummary)) {
-
-      if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
+      bool SingleImplDevirt =
+          trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
+      // Out of speculative devirtualization mode, Try to apply virtual constant
+      // propagation or branch funneling.
+      if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
         DidVirtualConstProp |=
             tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
 
diff --git a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
new file mode 100644
index 0000000000000..b7d1bda8d133b
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
@@ -0,0 +1,131 @@
+; -stats requires asserts
+; REQUIRES: asserts
+
+; Check that we can still devirtualize outside LTO mode when speculative devirtualization is enabled.
+; Check that we skip devirtualization for empty functions in speculative devirtualization mode
+
+; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively \
+; RUN: -pass-remarks=wholeprogramdevirt -stats %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf
+; CHECK: remark: devirt-single.cc:13:0: devirtualized vf
+; CHECK-NOT: devirtualized
+
+ at vt1 = constant [1 x ptr] [ptr @vf], !type !8
+ at vt2 = constant [1 x ptr] [ptr @vf_empty], !type !12
+
+define i1 @vf(ptr %this) #0 !dbg !7 {
+  ret i1 true
+}
+
+; This should NOT be devirtualized because during non-lto empty functions
+; are skipped.
+define void @vf_empty(ptr %this) !dbg !11 {
+  ret void
+}
+
+; CHECK: define void @call
+define void @call(ptr %obj) #1 !dbg !5 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid")
+  call void @llvm.assume(i1 %p)
+  %fptr = load ptr, ptr %vtable
+  ; CHECK: if.true.direct_targ:
+  ; CHECK:   call i1 @vf(
+  ; CHECK: if.false.orig_indirect:
+  ; CHECK:   call i1 %fptr(
+  call i1 %fptr(ptr %obj), !dbg !6
+  ret void
+}
+
+
+; CHECK: define void @call1
+define void @call1(ptr %obj) #1 !dbg !9 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1")
+  call void @llvm.assume(i1 %p)
+  %fptr = load ptr, ptr %vtable, align 8
+  ; CHECK: call i1 %fptr
+  %1 = call i1 %fptr(ptr %obj), !dbg !10
+  ret void
+}
+declare ptr @llvm.load.relative.i32(ptr, i32)
+
+ at vt3 = private unnamed_addr constant [1 x i32] [
+  i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32)
+], align 4, !type !15
+
+; CHECK: define void @call2
+define void @call2(ptr %obj) #1 !dbg !13 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2")
+  call void @llvm.assume(i1 %p)
+  %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0)
+  ; CHECK: if.true.direct_targ:
+  ; CHECK:   call i1 @vf(
+  ; CHECK: if.false.orig_indirect:
+  ; CHECK:   call i1 %fptr(
+  call i1 %fptr(ptr %obj), !dbg !14
+  ret void
+}
+
+ at _ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [
+  i32 0,  ; offset to top
+  i32 0,  ; rtti
+  i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32)  ; vf_emptyunc offset
+] }, align 4, !type !18
+
+; CHECK: define void @call3
+define void @call3(ptr %obj) #1 !dbg !16 {
+  %vtable = load ptr, ptr %obj
+  %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3")
+  call void @llvm.assume(i1 %p)
+  %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8)
+  ; CHECK: if.true.direct_targ:
+  ; CHECK:   call i1 @vf(
+  ; CHECK: if.false.orig_indirect:
+  ; CHECK:   call i1 %fptr(
+  call i1 %fptr(ptr %obj), !dbg !17
+  ret void
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "devirt-single.cc", directory: ".")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang version 4.0.0 (trunk 278098)"}
+!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!6 = !DILocation(line: 30, column: 32, scope: !5)
+!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!8 = !{i32 0, !"typeid"}
+
+!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!10 = !DILocation(line: 35, column: 32, scope: !9)
+!11 = distinct !DISubprogram(name: "vf_empty", linkageName: "_ZN3vt18vf_emptyEv", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!12 = !{i32 0, !"typeid1"}
+
+!13 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!14 = !DILocation(line: 41, column: 32, scope: !13)
+!15 = !{i32 0, !"typeid2"}
+
+!16 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
+!17 = !DILocation(line: 51, column: 32, scope: !16)
+!18 = !{i32 0, !"typeid3"}
+
+
+
+; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets
+; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations
diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
index d8f5c912e9a50..8327e1cfdf1d2 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
@@ -11,6 +11,9 @@
 ; Check wildcard
 ; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt -wholeprogramdevirt-skip=vf?i1 %s 2>&1 | FileCheck %s --check-prefix=SKIP
 
+; Check that no stats are reported in speculative devirtualization mode as the virtual const prop is disabled.
+; RUN: opt -S -passes=wholeprogramdevirt -devirtualize-speculatively -stats %s 2>&1 | FileCheck %s --check-prefix=CHECK-SPECULATIVE-WPD
+
 target datalayout = "e-p:64:64"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -225,3 +228,7 @@ declare ptr @llvm.load.relative.i32(ptr, i32)
 ; CHECK: 2 wholeprogramdevirt - Number of unique return value optimizations
 ; CHECK: 2 wholeprogramdevirt - Number of virtual constant propagations
 ; CHECK: 2 wholeprogramdevirt - Number of 1 bit virtual constant propagations
+
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of unique return value optimizations
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of virtual constant propagations
+; CHECK-SPECULATIVE-WPD-NOT: 0 wholeprogramdevirt - Number of 1 bit virtual constant propagations

>From e787aa2bb1274f511f37599f524e49ed41f8493c Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Tue, 23 Sep 2025 20:47:55 +0000
Subject: [PATCH 2/3] [WPD]: add devirt flag to conditionally enable
 speculative devirtualization        in the pass when it gets launched by pass
 manager.

---
 .../llvm/Transforms/IPO/WholeProgramDevirt.h  |  7 ++-
 .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 48 ++++++++++++-------
 .../speculative-devirt-single-impl.ll         |  3 +-
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 7a03405b4f462..2e33a4098be1b 100644
--- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -226,11 +226,14 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
   ModuleSummaryIndex *ExportSummary;
   const ModuleSummaryIndex *ImportSummary;
   bool UseCommandLine = false;
+  bool DevirtSpeculatively = false;
   WholeProgramDevirtPass()
       : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
   WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
-                         const ModuleSummaryIndex *ImportSummary)
-      : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+                         const ModuleSummaryIndex *ImportSummary,
+                         bool DevirtSpeculatively = false)
+      : ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+        DevirtSpeculatively(DevirtSpeculatively) {
     assert(!(ExportSummary && ImportSummary));
   }
   LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 64f574be8fd0e..79fd0ebb942f8 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -636,9 +636,11 @@ struct DevirtModule {
   std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
   PatternList FunctionsToSkip;
 
+  const bool DevirtSpeculatively;
   DevirtModule(Module &M, ModuleAnalysisManager &MAM,
                ModuleSummaryIndex *ExportSummary,
-               const ModuleSummaryIndex *ImportSummary)
+               const ModuleSummaryIndex *ImportSummary,
+               bool DevirtSpeculatively)
       : M(M), MAM(MAM),
         FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
         ExportSummary(ExportSummary), ImportSummary(ImportSummary),
@@ -651,7 +653,8 @@ struct DevirtModule {
         RemarksEnabled(areRemarksEnabled()),
         OREGetter([&](Function &F) -> OptimizationRemarkEmitter & {
           return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-        }) {
+        }),
+        DevirtSpeculatively(DevirtSpeculatively) {
     assert(!(ExportSummary && ImportSummary));
     FunctionsToSkip.init(SkipFunctionNames);
   }
@@ -765,7 +768,8 @@ struct DevirtModule {
 
   // Lower the module using the action and summary passed as command line
   // arguments. For testing purposes only.
-  static bool runForTesting(Module &M, ModuleAnalysisManager &MAM);
+  static bool runForTesting(Module &M, ModuleAnalysisManager &MAM,
+                            bool DevirtSpeculatively);
 };
 
 struct DevirtIndex {
@@ -808,11 +812,22 @@ struct DevirtIndex {
 PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
                                               ModuleAnalysisManager &MAM) {
   if (UseCommandLine) {
-    if (!DevirtModule::runForTesting(M, MAM))
+    if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively))
       return PreservedAnalyses::all();
     return PreservedAnalyses::none();
   }
-  if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run())
+
+  std::optional<ModuleSummaryIndex> Index;
+  if (!ExportSummary && !ImportSummary && DevirtSpeculatively) {
+    // Build the ExportSummary from the module.
+    assert(!ExportSummary &&
+           "ExportSummary is expected to be empty in non-LTO mode");
+    ProfileSummaryInfo PSI(M);
+    Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI));
+    ExportSummary = Index.has_value() ? &Index.value() : nullptr;
+  }
+  if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively)
+           .run())
     return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
@@ -1008,7 +1023,8 @@ static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) {
   return ErrorSuccess();
 }
 
-bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) {
+bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM,
+                                 bool DevirtSpeculatively) {
   std::unique_ptr<ModuleSummaryIndex> Summary =
       std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
 
@@ -1037,7 +1053,8 @@ bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) {
                    ClSummaryAction == PassSummaryAction::Export ? Summary.get()
                                                                 : nullptr,
                    ClSummaryAction == PassSummaryAction::Import ? Summary.get()
-                                                                : nullptr)
+                                                                : nullptr,
+                   DevirtSpeculatively)
           .run();
 
   if (!ClWriteSummary.empty()) {
@@ -1101,10 +1118,10 @@ bool DevirtModule::tryFindVirtualCallTargets(
     if (!TM.Bits->GV->isConstant())
       return false;
 
-    // Without ClDevirtualizeSpeculatively, we cannot perform whole program
+    // Without DevirtSpeculatively, we cannot perform whole program
     // devirtualization analysis on a vtable with public LTO visibility.
-    if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() ==
-                                            GlobalObject::VCallVisibilityPublic)
+    if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() ==
+                                    GlobalObject::VCallVisibilityPublic)
       return false;
 
     Function *Fn = nullptr;
@@ -1125,7 +1142,7 @@ bool DevirtModule::tryFindVirtualCallTargets(
 
     // In most cases empty functions will be overridden by the
     // implementation of the derived class, so we can skip them.
-    if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() &&
+    if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() &&
         Fn->getInstructionCount() <= 1)
       continue;
 
@@ -1251,8 +1268,7 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
       // add support to compare the virtual function pointer to the
       // devirtualized target. In case of a mismatch, fall back to indirect
       // call.
-      if (DevirtCheckMode == WPDCheckMode::Fallback ||
-          ClDevirtualizeSpeculatively) {
+      if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) {
         MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();
         // Version the indirect call site. If the called value is equal to the
         // given callee, 'NewInst' will be executed, otherwise the original call
@@ -1354,7 +1370,7 @@ bool DevirtModule::trySingleImplDevirt(
   // Out of speculative devirtualization mode, if the only implementation has
   // local linkage, we must promote to external to make it visible to thin LTO
   // objects.
-  if (!ClDevirtualizeSpeculatively && TheFn->hasLocalLinkage()) {
+  if (!DevirtSpeculatively && TheFn->hasLocalLinkage()) {
     std::string NewName = (TheFn->getName() + ".llvm.merged").str();
 
     // Since we are renaming the function, any comdats with the same name must
@@ -2378,7 +2394,7 @@ bool DevirtModule::run() {
       Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test);
   // If we are in speculative devirtualization mode, we can work on the public
   // type test intrinsics.
-  if (!TypeTestFunc && ClDevirtualizeSpeculatively)
+  if (!TypeTestFunc && DevirtSpeculatively)
     TypeTestFunc =
         Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test);
   Function *TypeCheckedLoadFunc =
@@ -2507,7 +2523,7 @@ bool DevirtModule::run() {
           trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res);
       // Out of speculative devirtualization mode, Try to apply virtual constant
       // propagation or branch funneling.
-      if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) {
+      if (!SingleImplDevirt && !DevirtSpeculatively) {
         DidVirtualConstProp |=
             tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
 
diff --git a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
index b7d1bda8d133b..9ac09f1517e14 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/speculative-devirt-single-impl.ll
@@ -23,8 +23,7 @@ define i1 @vf(ptr %this) #0 !dbg !7 {
   ret i1 true
 }
 
-; This should NOT be devirtualized because during non-lto empty functions
-; are skipped.
+; This should NOT be devirtualized because empty functions are skipped during non-lto.
 define void @vf_empty(ptr %this) !dbg !11 {
   ret void
 }

>From 415b4315373317b5668890638d2616b2787a744d Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 24 Sep 2025 07:44:55 +0000
Subject: [PATCH 3/3] [Clang]: Enable speculative devirtualization

---
 clang/docs/UsersManual.rst                   |  9 +++++++++
 clang/include/clang/Basic/CodeGenOptions.def |  2 ++
 clang/include/clang/Driver/Options.td        | 12 +++++++++---
 clang/lib/CodeGen/BackendUtil.cpp            |  1 +
 clang/lib/CodeGen/CGClass.cpp                | 14 ++++++++------
 clang/lib/CodeGen/CGVTables.cpp              |  6 ++++--
 clang/lib/CodeGen/ItaniumCXXABI.cpp          | 13 ++++++++-----
 clang/lib/Driver/ToolChains/Clang.cpp        |  7 +++++++
 clang/test/CodeGenCXX/type-metadata.cpp      |  8 ++++++++
 clang/test/Driver/clang_f_opts.c             |  2 --
 llvm/include/llvm/Passes/PassBuilder.h       |  4 ++++
 llvm/lib/Passes/PassBuilderPipelines.cpp     | 19 +++++++++++++++++++
 12 files changed, 79 insertions(+), 18 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index a8bbf146431ea..241f374bcf4c3 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2313,6 +2313,13 @@ are listed below.
    This enables better devirtualization. Turned off by default, because it is
    still experimental.
 
+.. option:: -fdevirtualize-speculatively
+
+   Enable speculative devirtualization optimization, such as single-implementation
+   devirtualization. This optimization is used out of LTO mode for now.
+   Turned off by default.
+   TODO: Enable for LTO mode.
+
 .. option:: -fwhole-program-vtables
 
    Enable whole-program vtable optimizations, such as single-implementation
@@ -5161,6 +5168,8 @@ Execute ``clang-cl /?`` to see a list of supported options:
       -fstandalone-debug      Emit full debug info for all types used by the program
       -fstrict-aliasing	      Enable optimizations based on strict aliasing rules
       -fsyntax-only           Run the preprocessor, parser and semantic analysis stages
+      -fdevirtualize-speculatively
+                              Enables speculative devirtualization optimization.
       -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto
       -gcodeview-ghash        Emit type record hashes in a .debug$H section
       -gcodeview              Generate CodeView debug information
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 872f73ebf3810..38174cf13cadf 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -358,6 +358,8 @@ VALUE_CODEGENOPT(WarnStackSize     , 32, UINT_MAX, Benign) ///< Set via -fwarn-s
 CODEGENOPT(NoStackArgProbe, 1, 0, Benign) ///< Set when -mno-stack-arg-probe is used
 CODEGENOPT(EmitLLVMUseLists, 1, 0, Benign) ///< Control whether to serialize use-lists.
 
+CODEGENOPT(DevirtualizeSpeculatively, 1, 0, Benign) ///< Whether to apply the speculative
+                                                    /// devirtualization optimization.
 CODEGENOPT(WholeProgramVTables, 1, 0, Benign) ///< Whether to apply whole-program
                                               ///  vtable optimization.
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 47d328f862e07..a3656b80cac32 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4432,6 +4432,13 @@ defm new_infallible : BoolFOption<"new-infallible",
   BothFlags<[], [ClangOption, CC1Option],
           " treating throwing global C++ operator new as always returning valid memory "
   "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">>;
+defm devirtualize_speculatively
+    : BoolFOption<"devirtualize-speculatively",
+                  CodeGenOpts<"DevirtualizeSpeculatively">, DefaultFalse,
+                  PosFlag<SetTrue, [], [],
+                          "Enables speculative devirtualization optimization.">,
+                  NegFlag<SetFalse>,
+                  BothFlags<[], [ClangOption, CLOption, CC1Option]>>;
 defm whole_program_vtables : BoolFOption<"whole-program-vtables",
   CodeGenOpts<"WholeProgramVTables">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
@@ -7012,9 +7019,8 @@ defm variable_expansion_in_unroller : BooleanFFlag<"variable-expansion-in-unroll
     Group<clang_ignored_gcc_optimization_f_Group>;
 defm web : BooleanFFlag<"web">, Group<clang_ignored_gcc_optimization_f_Group>;
 defm whole_program : BooleanFFlag<"whole-program">, Group<clang_ignored_gcc_optimization_f_Group>;
-defm devirtualize : BooleanFFlag<"devirtualize">, Group<clang_ignored_gcc_optimization_f_Group>;
-defm devirtualize_speculatively : BooleanFFlag<"devirtualize-speculatively">,
-    Group<clang_ignored_gcc_optimization_f_Group>;
+defm devirtualize : BooleanFFlag<"devirtualize">,
+                    Group<clang_ignored_gcc_optimization_f_Group>;
 
 // Generic gfortran options.
 def A_DASH : Joined<["-"], "A-">, Group<gfortran_Group>;
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 8c99af2bdff83..790467dc557a5 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -907,6 +907,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   // non-integrated assemblers don't recognize .cgprofile section.
   PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
   PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO;
+  PTO.DevirtualizeSpeculatively = CodeGenOpts.DevirtualizeSpeculatively;
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 8346ee3aa6a8d..bf1724e347a7f 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -2771,10 +2771,11 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
                                                    SourceLocation Loc) {
   if (SanOpts.has(SanitizerKind::CFIVCall))
     EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc);
-  else if (CGM.getCodeGenOpts().WholeProgramVTables &&
-           // Don't insert type test assumes if we are forcing public
-           // visibility.
-           !CGM.AlwaysHasLTOVisibilityPublic(RD)) {
+  else if ((CGM.getCodeGenOpts().WholeProgramVTables &&
+            // Don't insert type test assumes if we are forcing public
+            // visibility.
+            !CGM.AlwaysHasLTOVisibilityPublic(RD)) ||
+           CGM.getCodeGenOpts().DevirtualizeSpeculatively) {
     CanQualType Ty = CGM.getContext().getCanonicalTagType(RD);
     llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(Ty);
     llvm::Value *TypeId =
@@ -2932,8 +2933,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
 }
 
 bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) {
-  if (!CGM.getCodeGenOpts().WholeProgramVTables ||
-      !CGM.HasHiddenLTOVisibility(RD))
+  if ((!CGM.getCodeGenOpts().WholeProgramVTables ||
+       !CGM.HasHiddenLTOVisibility(RD)) &&
+      !CGM.getCodeGenOpts().DevirtualizeSpeculatively)
     return false;
 
   if (CGM.getCodeGenOpts().VirtualFunctionElimination)
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index e14e883a55ac5..959ba2031acf4 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -1358,10 +1358,12 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel(
 void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
                                            llvm::GlobalVariable *VTable,
                                            const VTableLayout &VTLayout) {
-  // Emit type metadata on vtables with LTO or IR instrumentation.
+  // Emit type metadata on vtables with LTO or IR instrumentation or
+  // speculative devirtualization.
   // In IR instrumentation, the type metadata is used to find out vtable
   // definitions (for type profiling) among all global variables.
-  if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr())
+  if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() &&
+      !getCodeGenOpts().DevirtualizeSpeculatively)
     return;
 
   CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 7dc2eaf1e9f75..71d36b5d6b153 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -717,9 +717,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
   bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination &&
                            CGM.HasHiddenLTOVisibility(RD);
   bool ShouldEmitWPDInfo =
-      CGM.getCodeGenOpts().WholeProgramVTables &&
-      // Don't insert type tests if we are forcing public visibility.
-      !CGM.AlwaysHasLTOVisibilityPublic(RD);
+      (CGM.getCodeGenOpts().WholeProgramVTables &&
+       // Don't insert type tests if we are forcing public visibility.
+       !CGM.AlwaysHasLTOVisibilityPublic(RD)) ||
+      CGM.getCodeGenOpts().DevirtualizeSpeculatively;
   llvm::Value *VirtualFn = nullptr;
 
   {
@@ -2114,13 +2115,15 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
   // definitions to ensure we associate derived classes with base classes
   // defined in headers but with a strong definition only in a shared library.
   if (!VTable->isDeclarationForLinker() ||
-      CGM.getCodeGenOpts().WholeProgramVTables) {
+      CGM.getCodeGenOpts().WholeProgramVTables ||
+      CGM.getCodeGenOpts().DevirtualizeSpeculatively) {
     CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout);
     // For available_externally definitions, add the vtable to
     // @llvm.compiler.used so that it isn't deleted before whole program
     // analysis.
     if (VTable->isDeclarationForLinker()) {
-      assert(CGM.getCodeGenOpts().WholeProgramVTables);
+      assert(CGM.getCodeGenOpts().WholeProgramVTables ||
+             CGM.getCodeGenOpts().DevirtualizeSpeculatively);
       CGM.addCompilerUsedGlobal(VTable);
     }
   }
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 63efb0f02baa8..d91afcbf3fd6c 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7785,6 +7785,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs);
 
+  // Temporarily disable this for LTO if it's not explicitly enabled.
+  // TODO: enable it by default for LTO also.
+  if (Args.hasFlag(options::OPT_fdevirtualize_speculatively,
+                   options::OPT_fno_devirtualize_speculatively,
+                   /*Default value*/ false))
+    CmdArgs.push_back("-fdevirtualize-speculatively");
+
   bool VirtualFunctionElimination =
       Args.hasFlag(options::OPT_fvirtual_function_elimination,
                    options::OPT_fno_virtual_function_elimination, false);
diff --git a/clang/test/CodeGenCXX/type-metadata.cpp b/clang/test/CodeGenCXX/type-metadata.cpp
index 1cb2fed8db3e6..61d36204942dc 100644
--- a/clang/test/CodeGenCXX/type-metadata.cpp
+++ b/clang/test/CodeGenCXX/type-metadata.cpp
@@ -14,6 +14,9 @@
 // RUN: %clang_cc1 -O2 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM-OPT --check-prefix=ITANIUM-OPT-LAYOUT %s
 // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TT-MS %s
 
+// Test for the speculative devirtualization feature in nonlto mode:
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT  %s
+
 // Tests for cfi + whole-program-vtables:
 // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility=hidden -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=ITANIUM-HIDDEN --check-prefix=ITANIUM-COMMON-MD --check-prefix=TC-ITANIUM --check-prefix=ITANIUM-NO-RV-MD %s
 // RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=CFI --check-prefix=CFI-VT --check-prefix=MS --check-prefix=MS-TYPEMETADATA --check-prefix=TC-MS %s
@@ -178,6 +181,7 @@ void af(A *a) {
   // TT-ITANIUM-HIDDEN: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
   // TT-ITANIUM-DEFAULT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
   // TT-MS: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"?AUA@@")
+  // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
   // TC-ITANIUM:    [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A")
   // TC-ITANIUM-RV: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata !"_ZTS1A")
   // TC-MS: [[PAIR:%[^ ]*]] = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@")
@@ -212,6 +216,7 @@ void df1(D *d) {
   // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
   // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA@@")
+  // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
   // TC-ITANIUM:    {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]])
   // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 0, metadata ![[DTYPE:[0-9]+]])
   // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA@@")
@@ -224,6 +229,7 @@ void dg1(D *d) {
   // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
   // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUB@@")
+  // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTS1B")
   // TC-ITANIUM:    {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTS1B")
   // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTS1B")
   // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUB@@")
@@ -236,6 +242,7 @@ void dh1(D *d) {
   // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
   // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
+  // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata ![[DTYPE]])
   // TC-ITANIUM:    {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 16, metadata ![[DTYPE]])
   // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE]])
   // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata ![[DTYPE:[0-9]+]])
@@ -297,6 +304,7 @@ void f(D *d) {
   // TT-ITANIUM-HIDDEN: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
   // TT-ITANIUM-DEFAULT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
   // TT-MS: {{%[^ ]*}} = call i1 @llvm.type.test(ptr {{%[^ ]*}}, metadata !"?AUA at test2@@")
+  // TT-ITANIUM-DEFAULT-NOLTO-SPECULATIVE-DEVIRT: {{%[^ ]*}} = call i1 @llvm.public.type.test(ptr {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
   // TC-ITANIUM:    {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 8, metadata !"_ZTSN5test21DE")
   // TC-ITANIUM-RV: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load.relative(ptr {{%[^ ]*}}, i32 4, metadata !"_ZTSN5test21DE")
   // TC-MS: {{%[^ ]*}} = call { ptr, i1 } @llvm.type.checked.load(ptr {{%[^ ]*}}, i32 0, metadata !"?AUA at test2@@")
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
index eb3994ddabcd3..1e6e1253a7a09 100644
--- a/clang/test/Driver/clang_f_opts.c
+++ b/clang/test/Driver/clang_f_opts.c
@@ -371,7 +371,6 @@
 // RUN: -ftree-ter                                                            \
 // RUN: -ftree-vrp                                                            \
 // RUN: -fno-devirtualize                                                     \
-// RUN: -fno-devirtualize-speculatively                                       \
 // RUN: -fslp-vectorize-aggressive                                            \
 // RUN: -fno-slp-vectorize-aggressive                                         \
 // RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-WARNING %s
@@ -430,7 +429,6 @@
 // CHECK-WARNING-DAG: optimization flag '-ftree-ter' is not supported
 // CHECK-WARNING-DAG: optimization flag '-ftree-vrp' is not supported
 // CHECK-WARNING-DAG: optimization flag '-fno-devirtualize' is not supported
-// CHECK-WARNING-DAG: optimization flag '-fno-devirtualize-speculatively' is not supported
 // CHECK-WARNING-DAG: the flag '-fslp-vectorize-aggressive' has been deprecated and will be ignored
 // CHECK-WARNING-DAG: the flag '-fno-slp-vectorize-aggressive' has been deprecated and will be ignored
 
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 2742ec1b71b7e..c1fcde24cc420 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -102,6 +102,10 @@ class PipelineTuningOptions {
   // analyses after various module->function or cgscc->function adaptors in the
   // default pipelines.
   bool EagerlyInvalidateAnalyses;
+
+  // Tuning option to enable/disable speculative devirtualization.
+  // Its default value is false.
+  bool DevirtualizeSpeculatively;
 };
 
 /// This class provides access to building LLVM's passes.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 79642e650ac83..bbaa7493445a3 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -322,6 +322,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
   MergeFunctions = EnableMergeFunctions;
   InlinerThreshold = -1;
   EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
+  DevirtualizeSpeculatively = false;
 }
 
 namespace llvm {
@@ -1635,6 +1636,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   if (!LTOPreLink)
     MPM.addPass(RelLookupTableConverterPass());
 
+  if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
+    MPM.addPass(WholeProgramDevirtPass(
+        /*ExportSummary*/ nullptr,
+        /*ImportSummary*/ nullptr,
+        /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
+                                   lowertypetests::DropTestKind::Assume));
+    if (EnableModuleInliner) {
+      MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
+                                    UseInlineAdvisor,
+                                    ThinOrFullLTOPhase::None));
+    } else {
+      MPM.addPass(ModuleInlinerWrapperPass(
+          getInlineParamsFromOptLevel(Level),
+          /* MandatoryFirst */ true,
+          InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner}));
+    }
+  }
   return MPM;
 }
 



More information about the llvm-commits mailing list