[llvm] [KernelInfo] Implement new LLVM IR pass for GPU code analysis (PR #102944)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 15 10:51:40 PDT 2024
================
@@ -0,0 +1,330 @@
+//===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the KernelInfoPrinter class used to emit remarks about
+// function properties from a GPU kernel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/KernelInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "kernel-info"
+
+/// Data structure holding function info for kernels.
+class KernelInfo {
+ void updateForBB(const BasicBlock &BB, int64_t Direction,
+ OptimizationRemarkEmitter &ORE);
+
+public:
+ static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
+ TargetMachine *TM);
+
+ /// Whether the function has external linkage and is not a kernel function.
+ bool ExternalNotKernel = false;
+
+ /// Launch bounds.
+ SmallVector<std::pair<StringRef, int64_t>> LaunchBounds;
+
+ /// The number of alloca instructions inside the function, the number of those
+ /// with allocation sizes that cannot be determined at compile time, and the
+ /// sum of the sizes that can be.
+ ///
+ /// With the current implementation for at least some GPU archs,
+ /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
+ /// case the implementation changes.
+ int64_t Allocas = 0;
+ int64_t AllocasDyn = 0;
+ int64_t AllocasStaticSizeSum = 0;
+
+ /// Number of direct/indirect calls (anything derived from CallBase).
+ int64_t DirectCalls = 0;
+ int64_t IndirectCalls = 0;
+
+ /// Number of direct calls made from this function to other functions
+ /// defined in this module.
+ int64_t DirectCallsToDefinedFunctions = 0;
+
+ /// Number of direct calls to inline assembly.
+ int64_t InlineAssemblyCalls = 0;
+
+ /// Number of calls of type InvokeInst.
+ int64_t Invokes = 0;
+
+ /// Target-specific flat address space.
+ unsigned FlatAddrspace;
+
+ /// Number of flat address space memory accesses (via load, store, etc.).
+ int64_t FlatAddrspaceAccesses = 0;
+};
+
+static bool isKernelFunction(Function &F) {
+ // TODO: Is this general enough? Consider languages beyond OpenMP.
+ return F.hasFnAttribute("kernel");
+}
+
+static void identifyCallee(OptimizationRemark &R, const Module *M,
+ const Value *V, StringRef Kind = "") {
+ SmallString<100> Name; // might be function name or asm expression
+ if (const Function *F = dyn_cast<Function>(V)) {
+ if (auto *SubProgram = F->getSubprogram()) {
+ if (SubProgram->isArtificial())
+ R << "artificial ";
+ Name = SubProgram->getName();
+ }
+ }
+ if (Name.empty()) {
+ raw_svector_ostream OS(Name);
+ V->printAsOperand(OS, /*PrintType=*/false, M);
+ }
+ if (!Kind.empty())
+ R << Kind << " ";
+ R << "'" << Name << "'";
+}
+
+static void identifyFunction(OptimizationRemark &R, const Function &F) {
+ identifyCallee(R, F.getParent(), &F, "function");
+}
+
+static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
+ const AllocaInst &Alloca,
+ TypeSize::ScalarTy StaticSize) {
+ ORE.emit([&] {
+ StringRef Name;
+ DebugLoc Loc;
+ bool Artificial = false;
+ auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
+ if (!DVRs.empty()) {
+ const DbgVariableRecord &DVR = **DVRs.begin();
+ Name = DVR.getVariable()->getName();
+ Loc = DVR.getDebugLoc();
+ Artificial = DVR.Variable->isArtificial();
+ }
+ OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
+ Alloca.getParent());
+ R << "in ";
+ identifyFunction(R, Caller);
+ R << ", ";
+ if (Artificial)
+ R << "artificial ";
+ if (Name.empty()) {
+ R << "unnamed alloca ";
+ if (DVRs.empty())
+ R << "(missing debug metadata) ";
+ } else {
+ R << "alloca '" << Name << "' ";
+ }
+ R << "with ";
+ if (StaticSize)
+ R << "static size of " << itostr(StaticSize) << " bytes";
+ else
+ R << "dynamic size";
+ return R;
+ });
+}
+
+static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
+ const CallBase &Call, StringRef CallKind,
+ StringRef RemarkKind) {
+ ORE.emit([&] {
+ OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
+ R << "in ";
+ identifyFunction(R, Caller);
+ R << ", " << CallKind << ", callee is ";
+ identifyCallee(R, Caller.getParent(), Call.getCalledOperand());
+ return R;
+ });
+}
+
+static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
+ const Function &Caller,
+ const Instruction &Inst) {
+ ORE.emit([&] {
+ OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
+ R << "in ";
+ identifyFunction(R, Caller);
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
+ R << ", '" << II->getCalledFunction()->getName() << "' call";
+ } else {
+ R << ", '" << Inst.getOpcodeName() << "' instruction";
+ }
+ if (!Inst.getType()->isVoidTy()) {
+ SmallString<20> Name;
+ raw_svector_ostream OS(Name);
+ Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
+ R << " ('" << Name << "')";
+ }
+ R << " accesses memory in flat address space";
+ return R;
+ });
+}
+
+void KernelInfo::updateForBB(const BasicBlock &BB, int64_t Direction,
+ OptimizationRemarkEmitter &ORE) {
+ assert(Direction == 1 || Direction == -1);
+ const Function &F = *BB.getParent();
+ const Module &M = *F.getParent();
+ const DataLayout &DL = M.getDataLayout();
+ for (const Instruction &I : BB.instructionsWithoutDebug()) {
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
+ Allocas += Direction;
+ TypeSize::ScalarTy StaticSize = 0;
+ if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
+ StaticSize = Size->getFixedValue();
+ assert(StaticSize <= std::numeric_limits<int64_t>::max());
+ AllocasStaticSizeSum += Direction * StaticSize;
+ } else {
+ AllocasDyn += Direction;
+ }
+ remarkAlloca(ORE, F, *Alloca, StaticSize);
+ } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
+ SmallString<40> CallKind;
+ SmallString<40> RemarkKind;
+ if (Call->isIndirectCall()) {
+ IndirectCalls += Direction;
+ CallKind += "indirect";
+ RemarkKind += "Indirect";
+ } else {
+ DirectCalls += Direction;
+ CallKind += "direct";
+ RemarkKind += "Direct";
+ }
+ if (isa<InvokeInst>(Call)) {
+ Invokes += Direction;
+ CallKind += " invoke";
+ RemarkKind += "Invoke";
+ } else {
+ CallKind += " call";
+ RemarkKind += "Call";
+ }
+ if (!Call->isIndirectCall()) {
+ if (const Function *Callee = Call->getCalledFunction()) {
+ if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
+ DirectCallsToDefinedFunctions += Direction;
+ CallKind += " to defined function";
+ RemarkKind += "ToDefinedFunction";
+ }
+ } else if (Call->isInlineAsm()) {
+ InlineAssemblyCalls += Direction;
+ CallKind += " to inline assembly";
+ RemarkKind += "ToInlineAssembly";
+ }
+ }
+ remarkCall(ORE, F, *Call, CallKind, RemarkKind);
+ if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
+ if (MI->getDestAddressSpace() == FlatAddrspace) {
+ FlatAddrspaceAccesses += Direction;
+ remarkFlatAddrspaceAccess(ORE, F, I);
+ } else if (const AnyMemTransferInst *MT =
+ dyn_cast<AnyMemTransferInst>(MI)) {
+ if (MT->getSourceAddressSpace() == FlatAddrspace) {
+ FlatAddrspaceAccesses += Direction;
+ remarkFlatAddrspaceAccess(ORE, F, I);
+ }
+ }
+ }
+ } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
+ if (Load->getPointerAddressSpace() == FlatAddrspace) {
+ FlatAddrspaceAccesses += Direction;
+ remarkFlatAddrspaceAccess(ORE, F, I);
+ }
+ } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
+ if (Store->getPointerAddressSpace() == FlatAddrspace) {
+ FlatAddrspaceAccesses += Direction;
+ remarkFlatAddrspaceAccess(ORE, F, I);
+ }
+ } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
+ if (At->getPointerAddressSpace() == FlatAddrspace) {
+ FlatAddrspaceAccesses += Direction;
+ remarkFlatAddrspaceAccess(ORE, F, I);
+ }
+ } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
+ if (At->getPointerAddressSpace() == FlatAddrspace) {
+ FlatAddrspaceAccesses += Direction;
+ remarkFlatAddrspaceAccess(ORE, F, I);
+ }
+ }
+ }
+}
+
+static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
+ StringRef Name, int64_t Value) {
+ ORE.emit([&] {
+ OptimizationRemark R(DEBUG_TYPE, Name, &F);
+ R << "in ";
+ identifyFunction(R, F);
+ R << ", " << Name << " = " << itostr(Value);
+ return R;
+ });
+}
+
+static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
+ StringRef Name) {
+ if (!F.hasFnAttribute(Name))
+ return std::nullopt;
+ return F.getFnAttributeAsParsedInteger(Name);
+}
+
+void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
+ TargetMachine *TM) {
+ KernelInfo KI;
+ TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F);
+ KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
+
+ // Record function properties.
+ KI.ExternalNotKernel = F.hasExternalLinkage() && !isKernelFunction(F);
+ for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
+ if (auto Val = parseFnAttrAsInteger(F, Name))
+ KI.LaunchBounds.push_back({Name, *Val});
+ }
+ TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds);
+
+ const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ for (const auto &BB : F)
+ if (DT.isReachableFromEntry(&BB))
----------------
arsenm wrote:
Why does this need to worry about unreachable blocks? This isn't doing any transforms that need to consider breaking the IR
https://github.com/llvm/llvm-project/pull/102944
More information about the llvm-commits
mailing list