[llvm] [KernelInfo] Implement new LLVM IR pass for GPU code analysis (PR #102944)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 12:05:56 PDT 2024
================
@@ -0,0 +1,350 @@
+//===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the KernelInfo, KernelInfoAnalysis, and KernelInfoPrinter
+// classes used to extract function properties from a kernel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/KernelInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "kernel-info"
+
+static bool isKernelFunction(Function &F) {
+ // TODO: Is this general enough? Consider languages beyond OpenMP.
+ return F.hasFnAttribute("kernel");
+}
+
+static void identifyFunction(OptimizationRemark &R, const Function &F) {
+ if (auto *SubProgram = F.getSubprogram()) {
+ if (SubProgram->isArtificial())
+ R << "artificial ";
+ }
+ R << "function '" << F.getName() << "'";
+}
+
+static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
+ const AllocaInst &Alloca,
+ TypeSize::ScalarTy StaticSize) {
+ ORE.emit([&] {
+ StringRef Name;
+ DebugLoc Loc;
+ bool Artificial = false;
+ auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
+ if (!DVRs.empty()) {
+ const DbgVariableRecord &DVR = **DVRs.begin();
+ Name = DVR.getVariable()->getName();
+ Loc = DVR.getDebugLoc();
+ Artificial = DVR.Variable->isArtificial();
+ }
+ OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
+ Alloca.getParent());
+ R << "in ";
+ identifyFunction(R, Caller);
+ R << ", ";
+ if (Artificial)
+ R << "artificial ";
+ if (Name.empty()) {
+ R << "unnamed alloca ";
+ if (DVRs.empty())
+ R << "(missing debug metadata) ";
+ } else {
+ R << "alloca '" << Name << "' ";
+ }
+ R << "with ";
+ if (StaticSize)
+ R << "static size of " << itostr(StaticSize) << " bytes";
+ else
+ R << "dynamic size";
+ return R;
+ });
+}
+
+static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
+ const CallBase &Call, StringRef CallKind,
+ StringRef RemarkKind) {
+ ORE.emit([&] {
+ OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
+ R << "in ";
+ identifyFunction(R, Caller);
+ R << ", " << CallKind;
+ if (const Function *Callee =
+ dyn_cast_or_null<Function>(Call.getCalledOperand())) {
+ R << ", callee is";
+ StringRef Name = Callee->getName();
+ if (auto *SubProgram = Callee->getSubprogram()) {
+ if (SubProgram->isArtificial())
+ R << " artificial";
+ }
+ if (!Name.empty())
+ R << " '" << Name << "'";
+ else
+ R << " with unknown name";
+ }
+ return R;
+ });
+}
+
+static void remarkAddrspaceZeroAccess(OptimizationRemarkEmitter &ORE,
+ const Function &Caller,
+ const Instruction &Inst) {
+ ORE.emit([&] {
+ OptimizationRemark R(DEBUG_TYPE, "AddrspaceZeroAccess", &Inst);
+ R << "in ";
+ identifyFunction(R, Caller);
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
+ R << ", '" << II->getCalledFunction()->getName() << "' call";
+ } else {
+ R << ", '" << Inst.getOpcodeName() << "' instruction";
+ }
+ if (Inst.hasName())
+ R << " ('%" << Inst.getName() << "')";
+ R << " accesses memory in addrspace(0)";
+ return R;
+ });
+}
+
+void KernelInfo::updateForBB(const BasicBlock &BB, int64_t Direction,
+ OptimizationRemarkEmitter &ORE) {
+ assert(Direction == 1 || Direction == -1);
+ const Function &F = *BB.getParent();
+ const Module &M = *F.getParent();
+ const DataLayout &DL = M.getDataLayout();
+ for (const Instruction &I : BB.instructionsWithoutDebug()) {
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
+ Allocas += Direction;
+ TypeSize::ScalarTy StaticSize = 0;
+ if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
+ StaticSize = Size->getFixedValue();
+ assert(StaticSize <= std::numeric_limits<int64_t>::max());
+ AllocasStaticSizeSum += Direction * StaticSize;
+ } else {
+ AllocasDyn += Direction;
+ }
+ remarkAlloca(ORE, F, *Alloca, StaticSize);
+ } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
+ std::string CallKind;
+ std::string RemarkKind;
+ if (Call->isIndirectCall()) {
+ IndirectCalls += Direction;
+ CallKind += "indirect";
+ RemarkKind += "Indirect";
+ } else {
+ DirectCalls += Direction;
+ CallKind += "direct";
+ RemarkKind += "Direct";
+ }
+ if (isa<InvokeInst>(Call)) {
+ Invokes += Direction;
+ CallKind += " invoke";
+ RemarkKind += "Invoke";
+ } else {
+ CallKind += " call";
+ RemarkKind += "Call";
+ }
+ if (!Call->isIndirectCall()) {
+ if (const Function *Callee = Call->getCalledFunction()) {
+ if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) {
+ DirectCallsToDefinedFunctions += Direction;
+ CallKind += " to defined function";
+ RemarkKind += "ToDefinedFunction";
+ }
+ }
+ }
+ remarkCall(ORE, F, *Call, CallKind, RemarkKind);
+ if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
+ if (MI->getDestAddressSpace() == 0) {
+ AddrspaceZeroAccesses += Direction;
+ remarkAddrspaceZeroAccess(ORE, F, I);
+ } else if (const AnyMemTransferInst *MT =
+ dyn_cast<AnyMemTransferInst>(MI)) {
+ if (MT->getSourceAddressSpace() == 0) {
+ AddrspaceZeroAccesses += Direction;
+ remarkAddrspaceZeroAccess(ORE, F, I);
+ }
+ }
+ }
+ } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
+ if (Load->getPointerAddressSpace() == 0) {
+ AddrspaceZeroAccesses += Direction;
+ remarkAddrspaceZeroAccess(ORE, F, I);
+ }
+ } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
+ if (Store->getPointerAddressSpace() == 0) {
+ AddrspaceZeroAccesses += Direction;
+ remarkAddrspaceZeroAccess(ORE, F, I);
+ }
+ } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
+ if (At->getPointerAddressSpace() == 0) {
+ AddrspaceZeroAccesses += Direction;
+ remarkAddrspaceZeroAccess(ORE, F, I);
+ }
+ } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
+ if (At->getPointerAddressSpace() == 0) {
+ AddrspaceZeroAccesses += Direction;
+ remarkAddrspaceZeroAccess(ORE, F, I);
+ }
+ }
+ }
+}
+
+static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
+ StringRef Name, int64_t Value) {
+ ORE.emit([&] {
+ OptimizationRemark R(DEBUG_TYPE, Name, &F);
+ R << "in ";
+ identifyFunction(R, F);
+ R << ", " << Name << " = " << itostr(Value);
+ return R;
+ });
+}
+
+static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
+ StringRef Name, std::optional<int64_t> Value) {
+ if (!Value)
+ return;
+ remarkProperty(ORE, F, Name, Value.value());
+}
+
+static std::vector<std::optional<int64_t>>
+parseFnAttrAsIntegerFields(Function &F, StringRef Name, unsigned NumFields) {
+ std::vector<std::optional<int64_t>> Result(NumFields);
+ Attribute A = F.getFnAttribute(Name);
+ if (!A.isStringAttribute())
+ return Result;
+ StringRef Rest = A.getValueAsString();
+ for (unsigned I = 0; I < NumFields; ++I) {
+ StringRef Field;
+ std::tie(Field, Rest) = Rest.split(',');
+ if (Field.empty())
+ break;
+ int64_t Val;
+ if (Field.getAsInteger(0, Val)) {
+ F.getContext().emitError("cannot parse integer in attribute '" + Name +
+ "': " + Field);
+ break;
+ }
+ Result[I] = Val;
+ }
+ if (!Rest.empty())
+ F.getContext().emitError("too many fields in attribute " + Name);
+ return Result;
+}
+
+static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
+ StringRef Name) {
+ return parseFnAttrAsIntegerFields(F, Name, 1)[0];
+}
+
+// TODO: This nearly duplicates the same function in OMPIRBuilder.cpp. Can we
+// share?
+static MDNode *getNVPTXMDNode(Function &F, StringRef Name) {
+ Module &M = *F.getParent();
+ NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations");
+ if (!MD)
+ return nullptr;
+ for (auto *Op : MD->operands()) {
+ if (Op->getNumOperands() != 3)
+ continue;
+ auto *KernelOp = dyn_cast<ConstantAsMetadata>(Op->getOperand(0));
+ if (!KernelOp || KernelOp->getValue() != &F)
+ continue;
+ auto *Prop = dyn_cast<MDString>(Op->getOperand(1));
+ if (!Prop || Prop->getString() != Name)
+ continue;
+ return Op;
+ }
+ return nullptr;
+}
+
+static std::optional<int64_t> parseNVPTXMDNodeAsInteger(Function &F,
+ StringRef Name) {
+ std::optional<int64_t> Result;
+ if (MDNode *ExistingOp = getNVPTXMDNode(F, Name)) {
+ auto *Op = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
+ Result = cast<ConstantInt>(Op->getValue())->getZExtValue();
+ }
+ return Result;
+}
+
+KernelInfo KernelInfo::getKernelInfo(Function &F,
+ FunctionAnalysisManager &FAM) {
+ KernelInfo KI;
+ // Only analyze modules for GPUs.
+ // TODO: This would be more maintainable if there were an isGPU.
+ const std::string &TT = F.getParent()->getTargetTriple();
+ llvm::Triple T(TT);
+ if (!T.isAMDGPU() && !T.isNVPTX())
+ return KI;
+ KI.IsValid = true;
+
+ // Record function properties.
+ KI.ExternalNotKernel = F.hasExternalLinkage() && !isKernelFunction(F);
+ KI.OmpTargetNumTeams = parseFnAttrAsInteger(F, "omp_target_num_teams");
+ KI.OmpTargetThreadLimit = parseFnAttrAsInteger(F, "omp_target_thread_limit");
+ auto AmdgpuMaxNumWorkgroups =
+ parseFnAttrAsIntegerFields(F, "amdgpu-max-num-workgroups", 3);
+ KI.AmdgpuMaxNumWorkgroupsX = AmdgpuMaxNumWorkgroups[0];
+ KI.AmdgpuMaxNumWorkgroupsY = AmdgpuMaxNumWorkgroups[1];
+ KI.AmdgpuMaxNumWorkgroupsZ = AmdgpuMaxNumWorkgroups[2];
+ auto AmdgpuFlatWorkGroupSize =
+ parseFnAttrAsIntegerFields(F, "amdgpu-flat-work-group-size", 2);
+ KI.AmdgpuFlatWorkGroupSizeMin = AmdgpuFlatWorkGroupSize[0];
+ KI.AmdgpuFlatWorkGroupSizeMax = AmdgpuFlatWorkGroupSize[1];
+ auto AmdgpuWavesPerEu =
+ parseFnAttrAsIntegerFields(F, "amdgpu-waves-per-eu", 2);
----------------
arsenm wrote:
Don't we already report information on this in the pass remarks in the AsmPrinter?
https://github.com/llvm/llvm-project/pull/102944
More information about the llvm-commits
mailing list