[llvm] [Hexagon] Add HVX caller-save remark pass for call-site diagnostics (PR #189188)

via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 28 12:22:30 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-hexagon

Author: Brian Cain (androm3da)

<details>
<summary>Changes</summary>

Add a new MachineFunctionPass (HexagonHVXSaveRemark) that emits optimization analysis remarks when HVX vector registers must be saved and restored around function calls.  All HVX registers are caller-saved (Section 5.3 of the Hexagon ABI), so any HVX value live across a call requires a save/restore pair on the stack. Each HVX vector is 64 or 128 bytes, making this overhead expensive.

The pass exits when remarks are not requested (-Rpass-analysis=hexagon-hvx-save) or when HVX is not enabled.  A byte threshold (default 1024, tunable via -hexagon-hvx-save-threshold) filters out functions with only a small number of saves.  The remarks help programmers identify call sites where inlining, hoisting, or sinking could reduce the save/restore cost.

---
Full diff: https://github.com/llvm/llvm-project/pull/189188.diff


5 Files Affected:

- (modified) llvm/lib/Target/Hexagon/CMakeLists.txt (+1) 
- (modified) llvm/lib/Target/Hexagon/Hexagon.h (+2) 
- (added) llvm/lib/Target/Hexagon/HexagonHVXSaveRemark.cpp (+116) 
- (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp (+3) 
- (added) llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll (+46) 


``````````diff
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index b4716693c3dc9..872820ca26068 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -42,6 +42,7 @@ add_llvm_target(HexagonCodeGen
   HexagonGlobalScheduler.cpp
   HexagonLiveVariables.cpp
   HexagonHardwareLoops.cpp
+  HexagonHVXSaveRemark.cpp
   HexagonHazardRecognizer.cpp
   HexagonInstrInfo.cpp
   HexagonISelDAGToDAG.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 1db2326b274dc..8ecd3520fc660 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -43,6 +43,7 @@ void initializeHexagonGenMemAbsolutePass(PassRegistry &);
 void initializeHexagonGenMuxPass(PassRegistry &);
 void initializeHexagonGlobalSchedulerPass(PassRegistry &);
 void initializeHexagonHardwareLoopsPass(PassRegistry &);
+void initializeHexagonHVXSaveRemarkPass(PassRegistry &);
 void initializeHexagonLiveVariablesPass(PassRegistry &);
 void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
 void initializeHexagonLoopAlignPass(PassRegistry &);
@@ -97,6 +98,7 @@ FunctionPass *createHexagonGenMux();
 FunctionPass *createHexagonGenPredicate();
 FunctionPass *createHexagonGlobalScheduler();
 FunctionPass *createHexagonHardwareLoops();
+FunctionPass *createHexagonHVXSaveRemark();
 FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
                                    CodeGenOptLevel OptLevel);
 FunctionPass *createHexagonLoopAlign();
diff --git a/llvm/lib/Target/Hexagon/HexagonHVXSaveRemark.cpp b/llvm/lib/Target/Hexagon/HexagonHVXSaveRemark.cpp
new file mode 100644
index 0000000000000..2631d4a05c16a
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonHVXSaveRemark.cpp
@@ -0,0 +1,116 @@
+//===- HexagonHVXSaveRemark.cpp - Remark on HVX saves around calls --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Diagnostic pass that emits optimization remarks when HVX vector registers
+// must be saved and restored around function calls.  All HVX registers are
+// caller-saved (Section 5.3 of the Hexagon ABI), so every HVX value that is
+// live across a call requires a save/restore pair on the stack.  Each HVX
+// vector is 64 or 128 bytes (depending on the mode), making this overhead
+// expensive.  The remarks help programmers identify call sites where inlining,
+// hoisting, or sinking the call could reduce the save/restore cost.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-hvx-save"
+
+static cl::opt<unsigned> HVXSaveThreshold(
+    "hexagon-hvx-save-threshold", cl::Hidden, cl::init(1024),
+    cl::desc("Minimum bytes of HVX caller-saves to trigger a remark"));
+
+namespace {
+
+struct HexagonHVXSaveRemark : public MachineFunctionPass {
+  static char ID;
+
+  HexagonHVXSaveRemark() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    MachineOptimizationRemarkEmitter MORE(MF, nullptr);
+    if (!MORE.allowExtraAnalysis(DEBUG_TYPE))
+      return false;
+
+    const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
+    if (!HST.useHVXOps())
+      return false;
+
+    // Identify HVX caller-save slots by matching stack-slot sizes against
+    // the HVX vector length (single vectors and vector pairs).
+    const MachineFrameInfo &MFI = MF.getFrameInfo();
+    unsigned HVXLen = HST.getVectorLength();
+    unsigned NumVecSaves = 0;
+    unsigned NumPairSaves = 0;
+
+    for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I < E;
+         ++I) {
+      if (!MFI.isSpillSlotObjectIndex(I))
+        continue;
+      int64_t Size = MFI.getObjectSize(I);
+      if (Size == (int64_t)HVXLen)
+        ++NumVecSaves;
+      else if (Size == (int64_t)(2 * HVXLen))
+        ++NumPairSaves;
+    }
+
+    unsigned TotalSaves = NumVecSaves + NumPairSaves;
+    unsigned TotalBytes = NumVecSaves * HVXLen + NumPairSaves * 2 * HVXLen;
+
+    if (TotalBytes < HVXSaveThreshold)
+      return false;
+
+    // Emit a remark on each call site.
+    for (const MachineBasicBlock &MBB : MF) {
+      for (const MachineInstr &MI : MBB) {
+        if (!MI.isCall())
+          continue;
+
+        MORE.emit([&]() {
+          using namespace ore;
+          MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "HVXSaveAroundCall",
+                                              MI.getDebugLoc(), &MBB);
+          R << NV("NumSaves", TotalSaves) << " HVX caller-saved register(s) ("
+            << NV("TotalBytes", TotalBytes)
+            << " bytes) saved and restored around call";
+          return R;
+        });
+      }
+    }
+
+    return false;
+  }
+
+  StringRef getPassName() const override { return "Hexagon HVX Save Remarks"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+char HexagonHVXSaveRemark::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(HexagonHVXSaveRemark, DEBUG_TYPE, "Hexagon HVX Save Remarks",
+                false, false)
+
+FunctionPass *llvm::createHexagonHVXSaveRemark() {
+  return new HexagonHVXSaveRemark();
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index e1af6ec8bb116..ca4bccc482a89 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -198,6 +198,7 @@ LLVMInitializeHexagonTarget() {
   initializeHexagonGlobalSchedulerPass(PR);
   initializeHexagonLiveVariablesPass(PR);
   initializeHexagonHardwareLoopsPass(PR);
+  initializeHexagonHVXSaveRemarkPass(PR);
   initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR);
   initializeHexagonNewValueJumpPass(PR);
   initializeHexagonOptAddrModePass(PR);
@@ -490,6 +491,8 @@ void HexagonPassConfig::addPreEmitPass() {
   if (EnableVectorPrint)
     addPass(createHexagonVectorPrint());
 
+  addPass(createHexagonHVXSaveRemark());
+
   // Add CFI instructions if necessary.
   addPass(createHexagonCallFrameInformation());
 }
diff --git a/llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll b/llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll
new file mode 100644
index 0000000000000..572625c4c5d31
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+; RUN:   -pass-remarks-analysis=hexagon-hvx-save %s -o /dev/null 2>&1 \
+; RUN:   | FileCheck %s
+
+;; Test that the HVX save remark pass reports caller-save costs around calls.
+;; All HVX registers are caller-saved, so any HVX value live across a call
+;; requires a save/restore pair on the stack.  The default threshold is 1024
+;; bytes, so we need at least 8 x 128-byte vectors live across the call.
+
+; CHECK: remark: {{.*}} HVX caller-saved register(s) ({{[0-9]+}} bytes) saved and restored around call
+; CHECK-NOT: remark:
+
+declare void @bar()
+
+;; 8 HVX vectors live across a call (8 x 128 = 1024 bytes) -- meets threshold.
+define void @test_hvx_save_around_call(ptr %p0, ptr %p1, ptr %p2, ptr %p3,
+                                       ptr %p4, ptr %p5, ptr %p6, ptr %p7) {
+entry:
+  %v0 = load <32 x i32>, ptr %p0, align 128
+  %v1 = load <32 x i32>, ptr %p1, align 128
+  %v2 = load <32 x i32>, ptr %p2, align 128
+  %v3 = load <32 x i32>, ptr %p3, align 128
+  %v4 = load <32 x i32>, ptr %p4, align 128
+  %v5 = load <32 x i32>, ptr %p5, align 128
+  %v6 = load <32 x i32>, ptr %p6, align 128
+  %v7 = load <32 x i32>, ptr %p7, align 128
+  call void @bar()
+  store <32 x i32> %v0, ptr %p0, align 128
+  store <32 x i32> %v1, ptr %p1, align 128
+  store <32 x i32> %v2, ptr %p2, align 128
+  store <32 x i32> %v3, ptr %p3, align 128
+  store <32 x i32> %v4, ptr %p4, align 128
+  store <32 x i32> %v5, ptr %p5, align 128
+  store <32 x i32> %v6, ptr %p6, align 128
+  store <32 x i32> %v7, ptr %p7, align 128
+  ret void
+}
+
+;; Single HVX vector live across call (128 bytes) -- below threshold.
+define void @test_below_threshold(ptr %p) {
+entry:
+  %v = load <32 x i32>, ptr %p, align 128
+  call void @bar()
+  store <32 x i32> %v, ptr %p, align 128
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/189188


More information about the llvm-commits mailing list