[llvm] [CGData] Global Merge Functions (PR #112671)
Zhaoxuan Jiang via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 17 01:25:40 PST 2024
================
@@ -0,0 +1,672 @@
+//===---- GlobalMergeFunctions.cpp - Global merge functions -------*- C++ -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements the global merge function pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalMergeFunctions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/CGData/CodeGenData.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/StructuralHash.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#define DEBUG_TYPE "global-merge-func"
+
+using namespace llvm;
+using namespace llvm::support;
+
+static cl::opt<bool> DisableCGDataForMerging(
+ "disable-cgdata-for-merging", cl::Hidden,
+ cl::desc("Disable codegen data for function merging. Local "
+ "merging is still enabled within a module."),
+ cl::init(false));
+
+STATISTIC(NumMismatchedFunctionHash,
+ "Number of mismatched function hash for global merge function");
+STATISTIC(NumMismatchedInstCount,
+ "Number of mismatched instruction count for global merge function");
+STATISTIC(NumMismatchedConstHash,
+ "Number of mismatched const hash for global merge function");
+STATISTIC(NumMismatchedModuleId,
+ "Number of mismatched Module Id for global merge function");
+STATISTIC(NumMergedFunctions,
+ "Number of functions that are actually merged using function hash");
+STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
+STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
+STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
+
+/// Returns true if the \OpIdx operand of \p CI is the callee operand.
+static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
+ return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
+}
+
+static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) {
+ if (CI->isInlineAsm())
+ return false;
+ Function *Callee = CI->getCalledOperand()
+ ? dyn_cast_or_null<Function>(
+ CI->getCalledOperand()->stripPointerCasts())
+ : nullptr;
+ if (Callee) {
+ if (Callee->isIntrinsic())
+ return false;
+ auto Name = Callee->getName();
+ // objc_msgSend stubs must be called, and can't have their address taken.
+ if (Name.starts_with("objc_msgSend$"))
+ return false;
+ // Calls to dtrace probes must generate unique patchpoints.
+ if (Name.starts_with("__dtrace"))
+ return false;
+ }
+ if (isCalleeOperand(CI, OpIdx) &&
+ CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) {
+ // The operand is the callee and it has already been signed. Ignore this
+ // because we cannot add another ptrauth bundle to the call instruction.
+ return false;
+ }
+ return true;
+}
+
+/// Returns true if function \p F is eligible for merging.
+bool isEligibleFunction(Function *F) {
+ if (F->isDeclaration())
+ return false;
+
+ if (F->hasFnAttribute(llvm::Attribute::NoMerge) ||
+ F->hasFnAttribute(llvm::Attribute::AlwaysInline))
+ return false;
+
+ if (F->hasAvailableExternallyLinkage())
+ return false;
+
+ if (F->getFunctionType()->isVarArg())
+ return false;
+
+ if (F->getCallingConv() == CallingConv::SwiftTail)
+ return false;
+
+ // If function contains callsites with musttail, if we merge
+ // it, the merged function will have the musttail callsite, but
+ // the number of parameters can change, thus the parameter count
+ // of the callsite will mismatch with the function itself.
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (CB && CB->isMustTailCall())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Call:
+ case Instruction::Invoke:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
+ assert(OpIdx < I->getNumOperands() && "Invalid operand index");
+
+ if (!isEligibleInstrunctionForConstantSharing(I))
+ return false;
+
+ if (!isa<Constant>(I->getOperand(OpIdx)))
+ return false;
+
+ if (const auto *CI = dyn_cast<CallBase>(I))
+ return canParameterizeCallOperand(CI, OpIdx);
+
+ return true;
+}
+
+static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
+ if (SrcTy->isStructTy()) {
+ assert(DestTy->isStructTy());
+ assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+ Value *Result = PoisonValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+ Value *Element =
+ createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
+ DestTy->getStructElementType(I));
+
+ Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isStructTy());
+ if (auto *SrcAT = dyn_cast<ArrayType>(SrcTy)) {
+ auto *DestAT = dyn_cast<ArrayType>(DestTy);
+ assert(DestAT);
+ assert(SrcAT->getNumElements() == DestAT->getNumElements());
+ Value *Result = UndefValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) {
+ Value *Element =
+ createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
+ DestAT->getElementType());
+
+ Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isArrayTy());
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ return Builder.CreateIntToPtr(V, DestTy);
+ if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ return Builder.CreatePtrToInt(V, DestTy);
+ return Builder.CreateBitCast(V, DestTy);
+}
+
+void GlobalMergeFunc::analyze(Module &M) {
+ ++NumAnalyzedModues;
+ for (Function &Func : M) {
+ ++NumAnalyzedFunctions;
+ if (isEligibleFunction(&Func)) {
+ ++NumEligibleFunctions;
+
+ auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
+
+ // Convert the operand map to a vector for a serialization-friendly
+ // format.
+ IndexOperandHashVecType IndexOperandHashes;
----------------
nocchijiang wrote:
Should we bail out for functions without parameterizable constants here?
https://github.com/llvm/llvm-project/pull/112671
More information about the llvm-commits
mailing list