[llvm] [AArch64] New pass for code layout optimizations. (PR #184434)

Sun Apr 5 07:30:35 PDT 2026

================
@@ -0,0 +1,225 @@
+//===-- AArch64CodeLayoutOpt.cpp - Code Layout Optimizations --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass runs after instruction scheduling and employs code layout
+// optimizations for certain patterns.
+//
+// Option -aarch64-code-layout-opt is a bitmask enable for instruction pairs of:
+//   Bit 0 (0x1): Enable FCMP-FCSEL code layout optimization
+//   Bit 1 (0x2): Enable CMP/CMN-CSEL code layout optimization
+//
+// The initial implementation induces function alignment to help optimize
+// code layout for the detected patterns.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-code-layout-opt"
+#define AARCH64_CODE_LAYOUT_OPT_NAME "AArch64 Code Layout Optimization"
+
+// Bitmask option for code alignment optimization:
+//   Bit 0 (0x1): Enable FCMP-FCSEL code layout optimization (requires
+//                hasFuseFCmpFCSel)
+//   Bit 1 (0x2): Enable CMP-CSEL code layout optimization,
+//                32-bit only (requires hasFuseCmpCSel)
+static cl::opt<unsigned> EnableCodeAlignment(
+    "aarch64-code-layout-opt", cl::Hidden,
+    cl::desc("Enable code alignment optimization for instruction pairs "
+             "(bitmask: bit 0 = FCMP-FCSEL, bit 1 = CMP-CSEL)"),
+    cl::init(0));
+
+static cl::opt<unsigned> FunctionAlignBytes(
+    "aarch64-code-layout-opt-align-functions", cl::Hidden,
+    cl::desc("Function alignment in bytes for code layout optimization "
+             "(must be a power of 2)"),
+    cl::init(64), cl::callback([](const unsigned &Val) {
+      if (!isPowerOf2_32(Val))
+        report_fatal_error(
+            "aarch64-code-layout-opt-align must be a power of 2");
+    }));
+
+STATISTIC(NumFunctionsAligned,
+          "Number of functions with aligned (to 64-bytes by default)");
+STATISTIC(NumFcmpFcselPairsDetected,
+          "Number of FCMP-FCSEL pairs detected for alignment");
+STATISTIC(NumCmpCselPairsDetected,
+          "Number of CMP/CMN-CSEL pairs detected for alignment");
+
+namespace {
+
+class AArch64CodeLayoutOpt : public MachineFunctionPass {
+public:
+  static char ID;
+  AArch64CodeLayoutOpt() : MachineFunctionPass(ID) {}
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  StringRef getPassName() const override {
+    return AARCH64_CODE_LAYOUT_OPT_NAME;
+  }
+
+private:
+  const AArch64InstrInfo *TII = nullptr;
+
+  // Returns true if MBB contains at least one layout-sensitive pattern.
+  bool detectLayoutSensitivePattern(MachineBasicBlock *MBB);
+
+  bool optimizeForCodeAlignment(MachineFunction &MF);
+};
+
+} // end anonymous namespace
+
+char AArch64CodeLayoutOpt::ID = 0;
+
+INITIALIZE_PASS(AArch64CodeLayoutOpt, "aarch64-code-layout-opt",
+                AARCH64_CODE_LAYOUT_OPT_NAME, false, false)
+
+void AArch64CodeLayoutOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createAArch64CodeLayoutOptPass() {
+  return new AArch64CodeLayoutOpt();
+}
+
+bool AArch64CodeLayoutOpt::runOnMachineFunction(MachineFunction &MF) {
+  if (!EnableCodeAlignment)
+    return false;
+
+  const auto *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
+  TII = Subtarget->getInstrInfo();
+
+  const unsigned Mask = EnableCodeAlignment;
+  if (!((Mask & 0x1) && Subtarget->hasFuseFCmpFCSel()) &&
----------------
ayasin-a wrote:

I opt for keeping single bit mask flag that provides flexibility in selecting (any combination of) patterns. Explicit flags won't be scalable. 

https://github.com/llvm/llvm-project/pull/184434