[llvm] [TypeProf][IndirectCallProm]Tweak max number of vtables by considering vtable address point materialization cost (PR #105620)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 21 23:39:51 PDT 2024
https://github.com/minglotus-6 created https://github.com/llvm/llvm-project/pull/105620
The ICP pass compares the vtable pointer with vtable address point for more efficient comparison sequence. The baseline is to compare the function pointer (loaded from vtable) with function address. The current cost-benefit analysis allows at most one vtable for each function candidate (to not insert instructions on hot paths).
* https://github.com/llvm/llvm-project/blob/main/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll has a sxs comparison
This patch tweaks the heuristic by allowing an additional vtable (for the last function candidate) if the cost of materializing a vtable is low.
>From c9e880bc0f065d55e5d5f86e814fa29b252c8a2a Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 16 Aug 2024 15:00:11 -0700
Subject: [PATCH 1/3] resolve comments and add unit tests
---
.../llvm/Analysis/TargetTransformInfo.h | 10 +++
.../llvm/Analysis/TargetTransformInfoImpl.h | 2 +
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 4 ++
llvm/include/llvm/CodeGen/TargetLowering.h | 6 +-
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 4 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +-
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 11 ++--
llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 +-
llvm/lib/Target/ARM/ARMISelLowering.cpp | 3 +-
llvm/lib/Target/ARM/ARMISelLowering.h | 2 +-
llvm/unittests/Target/AArch64/CMakeLists.txt | 1 +
.../Target/AArch64/TargetTransformInfo.cpp | 60 +++++++++++++++++
llvm/unittests/Target/X86/CMakeLists.txt | 1 +
.../Target/X86/TargetTransformInfo.cpp | 64 +++++++++++++++++++
18 files changed, 167 insertions(+), 21 deletions(-)
create mode 100644 llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
create mode 100644 llvm/unittests/Target/X86/TargetTransformInfo.cpp
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 38e8b9da213974..4956fc2c4303b0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -665,6 +665,12 @@ class TargetTransformInfo {
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const;
+ /// Return true if folding a constant offset with the given GlobalValue
+ /// (representing a GlobalAddress) is legal. It is frequently not legal
+ /// in PIC relocation models.
+ /// Caller must guarantee that GlobalValue represents a global address.
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const;
+
/// Targets can implement their own combinations for target-specific
/// intrinsics. This function will be called from the InstCombine pass every
/// time a target-specific intrinsic is encountered.
@@ -1880,6 +1886,7 @@ class TargetTransformInfo::Concept {
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) = 0;
+ virtual bool isOffsetFoldingLegal(const GlobalValue *GV) const = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
@@ -2348,6 +2355,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
SimplifyAndSetOp);
}
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override {
+ return Impl.isOffsetFoldingLegal(GV);
+ }
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d208a710bb27fd..bb81f4defcd633 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -220,6 +220,8 @@ class TargetTransformInfoImplBase {
void getPeelingPreferences(Loop *, ScalarEvolution &,
TTI::PeelingPreferences &) const {}
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const { return false; }
+
bool isLegalAddImmediate(int64_t Imm) const { return false; }
bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 890c2b8ca36e11..8059191c6dd471 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -354,6 +354,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);
}
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const {
+ return getTLI()->isOffsetFoldingLegal(GV);
+ }
+
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const {
auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 9ccdbab008aec8..72fd58c0f87536 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2811,6 +2811,10 @@ class TargetLoweringBase {
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;
+ virtual bool isOffsetFoldingLegal(const GlobalValue *GV) const {
+ return false;
+ }
+
/// Returns true if the targets addressing mode can target thread local
/// storage (TLS).
virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
@@ -3862,7 +3866,7 @@ class TargetLowering : public TargetLoweringBase {
/// Return true if folding a constant offset with the given GlobalAddress is
/// legal. It is frequently not legal in PIC relocation models.
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
/// On x86, return true if the operand with index OpNo is a CALL or JUMP
/// instruction, which can use either a memory constraint or an address
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index dcde78925bfa98..5e80c40c41a286 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -396,6 +396,10 @@ void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
return TTIImpl->getPeelingPreferences(L, SE, PP);
}
+bool TargetTransformInfo::isOffsetFoldingLegal(const GlobalValue *GV) const {
+ return TTIImpl->isOffsetFoldingLegal(GV);
+}
+
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return TTIImpl->isLegalAddImmediate(Imm);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f827eb559a01cf..9ad75fd328ceea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1161,7 +1161,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
}
} else {
if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
- if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
+ if (GA->getOpcode() == ISD::GlobalAddress &&
+ TLI.isOffsetFoldingLegal(GA->getGlobal()))
return false;
for (SDNode *Node : N->uses()) {
@@ -4006,7 +4007,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
- if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA->getGlobal())) {
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c3a7df5361cd45..9a8b6ae7da4053 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6339,7 +6339,7 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
const SDNode *N2) {
if (GA->getOpcode() != ISD::GlobalAddress)
return SDValue();
- if (!TLI->isOffsetFoldingLegal(GA))
+ if (!TLI->isOffsetFoldingLegal(GA->getGlobal()))
return SDValue();
auto *C2 = dyn_cast<ConstantSDNode>(N2);
if (!C2)
@@ -13112,7 +13112,7 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) const {
// constant integer.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
if (GA->getOpcode() == ISD::GlobalAddress &&
- TLI->isOffsetFoldingLegal(GA))
+ TLI->isOffsetFoldingLegal(GA->getGlobal()))
return GA;
if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
isa<ConstantSDNode>(N.getOperand(0)))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bef70dcb71f567..df63617c2d6883 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -485,10 +485,8 @@ SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
}
-bool
-TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool TargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
const TargetMachine &TM = getTargetMachine();
- const GlobalValue *GV = GA->getGlobal();
// If the address is not even local to this DSO we will have to load it from
// a got and then add the offset.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f0c3afc4f9b5d5..7e7bd9a924d9b6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11282,8 +11282,7 @@ SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
}
-bool AArch64TargetLowering::isOffsetFoldingLegal(
- const GlobalAddressSDNode *GA) const {
+bool AArch64TargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
// Offsets are folded in the DAG combine rather than here so that we can
// intelligently choose an offset based on the uses.
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 81e15185f985d5..eb02484bcbe95b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -622,7 +622,7 @@ class AArch64TargetLowering : public TargetLowering {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const override;
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f8767e00949bf0..da4c5c998b5dbd 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7551,8 +7551,7 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, SL, VT, Or);
}
-bool
-SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool SITargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
// OSes that use ELF REL relocations (instead of RELA) can only store a
// 32-bit addend in the instruction, so it is not safe to allow offset folding
// which can create arbitrary 64-bit addends. (This is only a problem for
@@ -7565,10 +7564,10 @@ SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
// We can fold offsets for anything that doesn't require a GOT relocation.
- return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
- !shouldEmitGOTReloc(GA->getGlobal());
+ return (GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+ !shouldEmitGOTReloc(GV);
}
static SDValue
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1f198a92c0fa6a..e9137614ecdf20 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -364,7 +364,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
unsigned combineRepeatedFPDivisors() const override {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 75d16a42d0205a..ed8a9ffd65dc84 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20974,8 +20974,7 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
}
-bool
-ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
// The ARM target isn't yet aware of offsets.
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index a255e9b6fc365f..758b09786f05d2 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -595,7 +595,7 @@ class VectorType;
bool
isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
index f53668373efee9..726fa46ba97eb0 100644
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -30,4 +30,5 @@ add_llvm_target_unittest(AArch64Tests
SMEAttributesTest.cpp
AArch64SVESchedPseudoTest.cpp
Immediates.cpp
+ TargetTransformInfo.cpp
)
diff --git a/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp b/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
new file mode 100644
index 00000000000000..b87675b9230650
--- /dev/null
+++ b/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
@@ -0,0 +1,60 @@
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+#include <initializer_list>
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+ SMDiagnostic Err;
+ std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+ if (!Mod)
+ Err.print(__FILE__, errs());
+ return Mod;
+}
+
+TEST(TargetTransformInfo, isOffsetFoldingLegal) {
+ LLVMInitializeAArch64TargetInfo();
+ LLVMInitializeAArch64Target();
+ LLVMInitializeAArch64TargetMC();
+
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M = parseIR(Ctx, R"(
+ target triple = "aarch64-unknown-linux-gnu"
+
+ @Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+ @Base2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+
+ define void @Base1_bar(ptr %this) {
+ ret void
+ }
+
+ declare i32 @Base1_foo(ptr)
+ )");
+
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget(M->getTargetTriple(), Error);
+ std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
+ M->getTargetTriple(), "generic", "", TargetOptions(), std::nullopt,
+ std::nullopt, CodeGenOptLevel::Default));
+
+ ASSERT_FALSE(TM->isPositionIndependent());
+
+ TargetTransformInfo TTI =
+ TM->getTargetTransformInfo(*M->getFunction("Base1_bar"));
+
+ EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base1")));
+ EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base2")));
+}
+} // namespace
diff --git a/llvm/unittests/Target/X86/CMakeLists.txt b/llvm/unittests/Target/X86/CMakeLists.txt
index b011681aa3b95a..236ab8aebf4f6a 100644
--- a/llvm/unittests/Target/X86/CMakeLists.txt
+++ b/llvm/unittests/Target/X86/CMakeLists.txt
@@ -23,5 +23,6 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(X86Tests
MachineSizeOptsTest.cpp
+ TargetTransformInfo.cpp
TernlogTest.cpp
)
diff --git a/llvm/unittests/Target/X86/TargetTransformInfo.cpp b/llvm/unittests/Target/X86/TargetTransformInfo.cpp
new file mode 100644
index 00000000000000..f38062c8691277
--- /dev/null
+++ b/llvm/unittests/Target/X86/TargetTransformInfo.cpp
@@ -0,0 +1,64 @@
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "gtest/gtest.h"
+#include <initializer_list>
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+ SMDiagnostic Err;
+ std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+ if (!Mod)
+ Err.print(__FILE__, errs());
+ return Mod;
+}
+
+TEST(TargetTransformInfo, isOffsetFoldingLegal) {
+ LLVMInitializeX86TargetInfo();
+ LLVMInitializeX86Target();
+ LLVMInitializeX86TargetMC();
+
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M = parseIR(Ctx, R"(
+
+ target triple = "x86_64-unknown-linux-gnu"
+ @Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+ @Base2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+
+ define void @Base1_bar(ptr %this) {
+ ret void
+ }
+
+ declare i32 @Base1_foo(ptr)
+ )");
+
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget(M->getTargetTriple(), Error);
+ std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
+ M->getTargetTriple(), "generic", "", TargetOptions(), std::nullopt,
+ std::nullopt, CodeGenOptLevel::Default));
+ ASSERT_FALSE(TM->isPositionIndependent());
+
+ Function *Func = M->getFunction("Base1_bar");
+
+ TargetTransformInfo TTI = TM->getTargetTransformInfo(*Func);
+
+ // Base1 is dso_local.
+ EXPECT_TRUE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base1")));
+
+ // Base2 is not dso_local.
+ EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base2")));
+}
+} // namespace
>From 8560fe3ac859e64545272f5a16fe4dcb023b05f9 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 21 Aug 2024 21:53:53 -0700
Subject: [PATCH 2/3] update comment
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 4956fc2c4303b0..4183c4f90f40fd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -665,10 +665,11 @@ class TargetTransformInfo {
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const;
- /// Return true if folding a constant offset with the given GlobalValue
- /// (representing a GlobalAddress) is legal. It is frequently not legal
- /// in PIC relocation models.
- /// Caller must guarantee that GlobalValue represents a global address.
+ /// Given a global address represented by a global value and a constant
+ /// offset relative to it, return true if the constant offset is foldable into
+ /// the global value when lowering the global address. The constant is usually
+ /// not foldable PIC relocation models. Caller must guarantee that GlobalValue
+ /// represents a global address.
bool isOffsetFoldingLegal(const GlobalValue *GV) const;
/// Targets can implement their own combinations for target-specific
>From d86204bc27e0f0f405428a60f0f88d82812dbdb4 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 21 Aug 2024 23:20:24 -0700
Subject: [PATCH 3/3] [TypeProf][IndirectCallProm]Tweak max number of vtables
by considering vtable address point materialization cost
---
.../Instrumentation/IndirectCallPromotion.cpp | 35 ++++-
.../Other/new-pm-thinlto-postlink-defaults.ll | 2 +-
.../new-pm-thinlto-postlink-pgo-defaults.ll | 2 +-
...-pm-thinlto-postlink-samplepgo-defaults.ll | 2 +-
.../new-pm-thinlto-prelink-pgo-defaults.ll | 2 +-
.../icp_vtable_address_point_cost.ll | 139 ++++++++++++++++++
6 files changed, 172 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 0d1f506986379d..72497b5f71a413 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -303,6 +304,8 @@ class IndirectCallPromoter {
Function &F;
Module &M;
+ const TargetTransformInfo &TTI;
+
ProfileSummaryInfo *PSI = nullptr;
// Symtab that maps indirect call profile values to function names and
@@ -369,6 +372,9 @@ class IndirectCallPromoter {
ArrayRef<PromotionCandidate> Candidates,
uint64_t TotalCount);
+ bool addressPointLoweringCostComparable(
+ const VTableGUIDCountsMap &VTableGUIDCounts) const;
+
// Given an indirect callsite and the list of function candidates, compute
// the following vtable information in output parameters and return vtable
// pointer if type profiles exist.
@@ -391,12 +397,12 @@ class IndirectCallPromoter {
public:
IndirectCallPromoter(
- Function &Func, Module &M, ProfileSummaryInfo *PSI,
- InstrProfSymtab *Symtab, bool SamplePGO,
+ Function &Func, Module &M, const TargetTransformInfo &TTI,
+ ProfileSummaryInfo *PSI, InstrProfSymtab *Symtab, bool SamplePGO,
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
OptimizationRemarkEmitter &ORE)
- : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
+ : F(Func), M(M), TTI(TTI), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
VirtualCSInfo(VirtualCSInfo),
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
@@ -833,6 +839,18 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
return Changed;
}
+bool IndirectCallPromoter::addressPointLoweringCostComparable(
+ const VTableGUIDCountsMap &VTableGUIDAndCounts) const {
+ for (auto &[GUID, Count] : VTableGUIDAndCounts) {
+ GlobalVariable *VTable = Symtab->getGlobalVariable(GUID);
+ assert(VTable != nullptr &&
+ "guaranteed by IndirectCallPromoter::computeVTableInfos");
+ if (!TTI.isOffsetFoldingLegal(VTable))
+ return false;
+ }
+ return true;
+}
+
// TODO: Return false if the function addressing and vtable load instructions
// cannot sink to indirect fallback.
bool IndirectCallPromoter::isProfitableToCompareVTables(
@@ -877,8 +895,12 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
// chain for the subsequent candidates. Set its value to 1 for non-last
// candidate and allow option to override it for the last candidate.
int MaxNumVTable = 1;
- if (I == CandidateSize - 1)
- MaxNumVTable = ICPMaxNumVTableLastCandidate;
+ if (I == CandidateSize - 1) {
+ if (addressPointLoweringCostComparable(VTableGUIDAndCounts))
+ MaxNumVTable = 2;
+ if (ICPMaxNumVTableLastCandidate.getNumOccurrences())
+ MaxNumVTable = ICPMaxNumVTableLastCandidate;
+ }
if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
@@ -991,8 +1013,9 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
auto &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
- IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO,
+ IndirectCallPromoter CallPromoter(F, M, TTI, PSI, &Symtab, SamplePGO,
VirtualCSInfo,
VTableAddressPointOffsetVal, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index 064362eabbf839..60921edd7cee23 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -43,12 +43,12 @@
; CHECK-POSTLINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-POSTLINK-O-NEXT: Running analysis: TargetIRAnalysis on foo
; CHECK-O-NEXT: Running pass: OpenMPOptPass
; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 19a44867e434ac..30ec5dc3b9e23d 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -28,12 +28,12 @@
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis on foo
; CHECK-O-NEXT: Running pass: OpenMPOptPass
; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index e5aebc4850e6db..21364b6a66f872 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -37,12 +37,12 @@
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: OpenMPOptPass
; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index cb49cbd22d60c0..153856112f20e7 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -84,6 +84,7 @@
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion on
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
@@ -106,7 +107,6 @@
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
; CHECK-O-NEXT: Running analysis: AAManager
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll
new file mode 100644
index 00000000000000..95a95d4ac7c669
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0
+ at Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2
+ at Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6
+
+ at Derived1 = constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3
+ at Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7
+ at Derived3 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8
+
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived1_bar with count 600 out of 1600, sink 2 instruction(s) and compare 1 vtable(s): {Derived1}
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived2_bar with count 500 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {Derived2}
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Base1_bar with count 400 out of 500, sink 2 instruction(s) and compare 2 vtable(s): {Derived3, Base1}
+
+define void @test(ptr %d) {
+; VTABLE-CMP-LABEL: define void @test(
+; VTABLE-CMP-SAME: ptr [[D:%.*]]) {
+; VTABLE-CMP-NEXT: [[ENTRY:.*:]]
+; VTABLE-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
+; VTABLE-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1")
+; VTABLE-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; VTABLE-CMP-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived1, i32 40)
+; VTABLE-CMP-NEXT: br i1 [[TMP1]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG]]:
+; VTABLE-CMP-NEXT: call void @Derived1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT]]:
+; VTABLE-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived2, i32 64)
+; VTABLE-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG1]]:
+; VTABLE-CMP-NEXT: call void @Derived2_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT2]]:
+; VTABLE-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Base1, i32 16)
+; VTABLE-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived3, i32 16)
+; VTABLE-CMP-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
+; VTABLE-CMP-NEXT: br i1 [[TMP5]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG4]]:
+; VTABLE-CMP-NEXT: call void @Base1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT5]]:
+; VTABLE-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; VTABLE-CMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8
+; VTABLE-CMP-NEXT: call void [[TMP6]](ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6]]
+; VTABLE-CMP: [[IF_END_ICP6]]:
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3]]
+; VTABLE-CMP: [[IF_END_ICP3]]:
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP]]
+; VTABLE-CMP: [[IF_END_ICP]]:
+; VTABLE-CMP-NEXT: ret void
+;
+; FUNC-CMP-LABEL: define void @test(
+; FUNC-CMP-SAME: ptr [[D:%.*]]) {
+; FUNC-CMP-NEXT: [[ENTRY:.*:]]
+; FUNC-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
+; FUNC-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1")
+; FUNC-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; FUNC-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; FUNC-CMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; FUNC-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @Derived1_bar
+; FUNC-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG]]:
+; FUNC-CMP-NEXT: call void @Derived1_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT]]:
+; FUNC-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP1]], @Derived2_bar
+; FUNC-CMP-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG1]]:
+; FUNC-CMP-NEXT: call void @Derived2_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP3:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT2]]:
+; FUNC-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @Base1_bar
+; FUNC-CMP-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG4]]:
+; FUNC-CMP-NEXT: call void @Base1_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP6:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT5]]:
+; FUNC-CMP-NEXT: call void [[TMP1]](ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP6]]
+; FUNC-CMP: [[IF_END_ICP6]]:
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP3]]
+; FUNC-CMP: [[IF_END_ICP3]]:
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP]]
+; FUNC-CMP: [[IF_END_ICP]]:
+; FUNC-CMP-NEXT: ret void
+;
+entry:
+ %vtable = load ptr, ptr %d, !prof !9
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"Base1")
+ tail call void @llvm.assume(i1 %0)
+ %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+ %1 = load ptr, ptr %vfn
+ call void %1(ptr %d), !prof !10
+ ret void
+}
+
+define void @Base1_bar(ptr %this) {
+ ret void
+}
+
+define void @Derived1_bar(ptr %this) {
+ ret void
+}
+
+define void @Derived2_bar(ptr %this) {
+ ret void
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+declare i32 @Base2_foo(ptr)
+declare i32 @Base1_foo(ptr)
+declare void @Base3_foo(ptr)
+
+!0 = !{i64 16, !"Base1"}
+!1 = !{i64 40, !"Base1"}
+!2 = !{i64 16, !"Base2"}
+!3 = !{i64 16, !"Derived1"}
+!4 = !{i64 64, !"Base1"}
+!5 = !{i64 40, !"Base2"}
+!6 = !{i64 16, !"Base3"}
+!7 = !{i64 16, !"Derived2"}
+!8 = !{i64 16, !"Derived3"}
+!9 = !{!"VP", i32 2, i64 1600, i64 -4123858694673519054, i64 600, i64 -7211198353767973908, i64 500, i64 -3574436251470806727, i64 200, i64 6288809125658696740, i64 200, i64 12345678, i64 100}
+!10 = !{!"VP", i32 0, i64 1600, i64 3827408714133779784, i64 600, i64 5837445539218476403, i64 500, i64 -9064955852395570538, i64 400, i64 56781234, i64 100}
+;.
+; VTABLE-COMMON: [[PROF9]] = !{!"VP", i32 2, i64 100, i64 12345678, i64 100}
+; VTABLE-COMMON: [[PROF10]] = !{!"branch_weights", i32 600, i32 1000}
+; VTABLE-COMMON: [[PROF11]] = !{!"branch_weights", i32 500, i32 500}
+; VTABLE-COMMON: [[PROF12]] = !{!"branch_weights", i32 400, i32 100}
+
More information about the llvm-commits
mailing list