[llvm] [AMDGPU] Add backward compatibility layer for kernarg preloading (PR #119167)
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 14:28:58 PST 2024
================
@@ -0,0 +1,229 @@
+//===- AMDGPUPreloadKernargHeader.cpp - Preload Kernarg Header ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass handles the creation of the backwards compatability layer
+/// for kernarg prealoding. Code may be compiled with the feature enabled, while
+/// the kernel is executed on hardware without firmware support.
+///
+/// To avoid the need for recompilation, we insert a block at the beginning of
+/// the kernel that is responsible for loading the kernel arguments into SGPRs
+/// using s_load instructions which setup the registers exactly as they would be
+/// by firmware if the code were executed on a system that supported kernarg
+/// preladoing.
+///
+/// This essentially allows for two entry points for the kernel. Firmware that
+/// supports the feature will automatically jump past the first 256 bytes of the
+/// program, skipping the backwards compatibility layer and directly beginning
+/// execution on the fast code path.
+///
+/// This pass should be run as late as possible, to avoid any optimization that
+/// may assume that padding is dead code or that the prologue added here is a
+/// true predecessor of the kernel entry block.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUPreloadKernargHeader.h"
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/TargetParser/TargetParser.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-preload-kernarg-header"
+
+namespace {
+
+struct LoadConfig {
+ unsigned Size;
+ const TargetRegisterClass *RegClass;
+ unsigned Opcode;
+ Register LoadReg;
+
+ // Constructor for the static config array
+ LoadConfig(unsigned S, const TargetRegisterClass *RC, unsigned Op)
+ : Size(S), RegClass(RC), Opcode(Op), LoadReg(AMDGPU::NoRegister) {}
+
+ // Constructor for the return value
+ LoadConfig(unsigned S, const TargetRegisterClass *RC, unsigned Op,
+ Register Reg)
+ : Size(S), RegClass(RC), Opcode(Op), LoadReg(Reg) {}
+};
+
+class AMDGPUPreloadKernargHeader {
+public:
+ AMDGPUPreloadKernargHeader(MachineFunction &MF);
+
+ bool run();
+
+private:
+ MachineFunction &MF;
+ const GCNSubtarget &ST;
+ const SIMachineFunctionInfo &MFI;
+ const SIInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+ // Create a new block before the entry point to the kernel. Firmware that
+ // supports preloading kernel arguments will automatically jump past this
+ // block to the alternative kernel entry point.
+ void createBackCompatBlock();
+
+ // Add instructions to load kernel arguments into SGPRs, returns the number of
+ // s_load instructions added.
+ unsigned addBackCompatLoads(MachineBasicBlock *BackCompatMBB,
+ Register KernargSegmentPtr,
+ unsigned NumKernargPreloadSGPRs);
+};
+
+class AMDGPUPreloadKernargHeaderLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUPreloadKernargHeaderLegacy() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "AMDGPU Preload Kernarg Header";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char AMDGPUPreloadKernargHeaderLegacy::ID = 0;
+
+INITIALIZE_PASS(AMDGPUPreloadKernargHeaderLegacy, DEBUG_TYPE,
+ "AMDGPU Preload Kernarg Header", false, false)
+
+char &llvm::AMDGPUPreloadKernargHeaderLegacyID =
+ AMDGPUPreloadKernargHeaderLegacy::ID;
+
+FunctionPass *llvm::createAMDGPUPreloadKernargHeaderLegacyPass() {
+ return new AMDGPUPreloadKernargHeaderLegacy();
+}
+
+bool AMDGPUPreloadKernargHeaderLegacy::runOnMachineFunction(
+ MachineFunction &MF) {
+ return AMDGPUPreloadKernargHeader(MF).run();
+}
+
+AMDGPUPreloadKernargHeader::AMDGPUPreloadKernargHeader(MachineFunction &MF)
+ : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()),
+ MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(*ST.getInstrInfo()),
+ TRI(*ST.getRegisterInfo()) {}
+
+bool AMDGPUPreloadKernargHeader::run() {
+ if (!ST.hasKernargPreload())
+ return false;
+
+ unsigned NumPreloadSGPRs = MFI.getNumKernargPreloadedSGPRs();
+ if (NumPreloadSGPRs <= 0)
+ return false;
+
+ if (MF.begin() == MF.end())
+ return false;
+
+ createBackCompatBlock();
+
+ return true;
+}
+
+void AMDGPUPreloadKernargHeader::createBackCompatBlock() {
+ auto KernelEntryMBB = MF.begin();
+ MachineBasicBlock *BackCompatMBB = MF.CreateMachineBasicBlock();
+ MF.insert(KernelEntryMBB, BackCompatMBB);
+ BackCompatMBB->addSuccessor(&*KernelEntryMBB);
+
+ assert(MFI.getUserSGPRInfo().hasKernargSegmentPtr());
----------------
kerbowa wrote:
done
https://github.com/llvm/llvm-project/pull/119167
More information about the llvm-commits
mailing list