[llvm] [AMDGPU] Support preloading hidden kernel arguments (PR #98861)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 2 11:15:15 PDT 2024


================
@@ -64,6 +150,94 @@ class PreloadKernelArgInfo {
     NumFreeUserSGPRs -= (NumPreloadSGPRs + PaddingSGPRs);
     return true;
   }
+
+  // Try to allocate SGPRs to preload implicit kernel arguments.
+  void tryAllocImplicitArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset,
+                                       IRBuilder<> &Builder) {
+    StringRef Name = Intrinsic::getName(Intrinsic::amdgcn_implicitarg_ptr);
+    Function *ImplicitArgPtr = F.getParent()->getFunction(Name);
+    if (!ImplicitArgPtr)
+      return;
+
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    // Pair is the load and the load offset.
+    SmallVector<std::pair<LoadInst *, unsigned>, 4> ImplicitArgLoads;
+    for (auto *U : ImplicitArgPtr->users()) {
+      Instruction *CI = dyn_cast<Instruction>(U);
+      if (!CI || CI->getParent()->getParent() != &F)
+        continue;
+
+      for (auto *U : CI->users()) {
+        int64_t Offset = 0;
+        auto *Load = dyn_cast<LoadInst>(U); // Load from ImplicitArgPtr?
+        if (!Load) {
+          if (GetPointerBaseWithConstantOffset(U, Offset, DL) != CI)
+            continue;
+
+          Load = dyn_cast<LoadInst>(*U->user_begin()); // Load from GEP?
+        }
+
+        if (!Load || !Load->isSimple())
+          continue;
+
+        // FIXME: Expand to handle 64-bit implicit args and large merged loads.
+        unsigned LoadSize = Load->getType()->getScalarSizeInBits();
+        if (LoadSize != 32 && LoadSize != 16)
+          continue;
+
+        ImplicitArgLoads.push_back(std::make_pair(Load, Offset));
+      }
+    }
+
+    if (ImplicitArgLoads.empty())
+      return;
+
+    // Allocate loads in order of offset. We need to be sure that the implicit
+    // argument can actually be preloaded.
+    std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(),
+              [](const std::pair<LoadInst *, unsigned> &A,
+                 const std::pair<LoadInst *, unsigned> &B) {
+                return A.second < B.second;
+              });
+
+    uint64_t LastExplicitArgOffset = ImplicitArgsBaseOffset;
+    bool AddedHiddenArgsToSignature = false;
+    Function *NF = nullptr;
+    unsigned LastPreloadIndex = 0;
+    for (const auto &Load : ImplicitArgLoads) {
+      LoadInst *LoadInst = Load.first;
+      Type *LoadType = LoadInst->getType();
+      auto LoadOffset = Load.second;
+      unsigned LoadSize = DL.getTypeStoreSize(LoadType);
+      // If we fail to preload any implicit argument we know we don't have SGPRs
+      // to preload any subsequent ones with larger offsets.
+      if (!tryAllocPreloadSGPRs(LoadSize, LoadOffset + ImplicitArgsBaseOffset,
+                                LastExplicitArgOffset))
+        break;
+
+      if (!AddedHiddenArgsToSignature) {
+        NF = cloneFunctionWithPreloadImplicitArgs();
+        AddedHiddenArgsToSignature = true;
+      }
+
+      LastExplicitArgOffset = LoadOffset + LoadSize;
+      unsigned HiddenArgIndex = getHiddenArgIndexFromOffset(LoadOffset);
+      assert(NF);
+      unsigned Index = NF->arg_size() - END_HIDDEN_ARGS + HiddenArgIndex;
+      Argument *Arg = NF->getArg(Index);
+      LoadInst->replaceAllUsesWith(Arg);
+      if (Index > HiddenArgIndex)
+        LastPreloadIndex = HiddenArgIndex;
+    }
+
+    // Ensure all hidden arguments up to the final preload are also
+    // preloaded, even if some are unused.
+    for (unsigned I = 0; I <= LastPreloadIndex; ++I)
+      NF->getArg(NF->arg_size() - END_HIDDEN_ARGS + I)
+          ->addAttr(Attribute::InReg);
----------------
arsenm wrote:

This should probably be using AttrBuilder and adding them all at once at the end 

https://github.com/llvm/llvm-project/pull/98861


More information about the llvm-commits mailing list