[llvm] [AMDGPU][PromoteAlloca] Whole-function alloca promotion to vector (PR #84735)

Mon Mar 11 03:45:05 PDT 2024

================
@@ -225,6 +249,46 @@ FunctionPass *llvm::createAMDGPUPromoteAllocaToVector() {
   return new AMDGPUPromoteAllocaToVector();
 }
 
+void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
+    SmallVectorImpl<AllocaInst *> &Allocas) {
+  DenseMap<AllocaInst *, unsigned> Scores;
+
+  LLVM_DEBUG(dbgs() << "Before sorting allocas:\n"; for (auto *A
+                                                         : Allocas) dbgs()
+                                                    << "  " << *A << "\n";);
+
+  for (auto *Alloca : Allocas) {
+    LLVM_DEBUG(dbgs() << "Scoring: " << *Alloca << "\n");
+    unsigned &Score = Scores[Alloca];
+    // Increment score by one for each user + a bonus for users within loops.
+    //
+    // Look through GEPs for additional users.
+    SmallVector<User *, 8> WorkList(Alloca->user_begin(), Alloca->user_end());
+    while (!WorkList.empty()) {
+      auto *Inst = cast<Instruction>(WorkList.pop_back_val());
+
+      if (isa<GetElementPtrInst>(Inst)) {
+        WorkList.append(Inst->user_begin(), Inst->user_end());
+        continue;
+      }
+
+      unsigned UserScore =
+          1 + (LoopUserWeight * LI.getLoopDepth(Inst->getParent()));
+      LLVM_DEBUG(dbgs() << "  [+" << UserScore << "]:\t" << *Inst << "\n");
+      Score += UserScore;
+    }
+    LLVM_DEBUG(dbgs() << "  => Final Score:" << Score << "\n");
+  }
+
+  sort(Allocas, [&](AllocaInst *A, AllocaInst *B) {
----------------
arsenm wrote:

Should we use stable_sort to ensure the original order as a tie breaker? 

https://github.com/llvm/llvm-project/pull/84735