[llvm] [Hexagon] Generate absolute-set load/store instructions. (PR #82034)

Sumanth Gundapaneni via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 21 08:19:14 PST 2024


https://github.com/sgundapa updated https://github.com/llvm/llvm-project/pull/82034

>From d1ca749830d50a243b7f9d6418bd30e1e1522dcb Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sgundapa at quicinc.com>
Date: Fri, 16 Feb 2024 11:48:58 -0800
Subject: [PATCH] [Hexagon] Generate absolute-set load/store instructions.

The optimization finds the loads/stores of a specific form and
translate the first load/store to an absolute-set form there by
optimizing out the transfer and eliminate the constant extenders.
---
 llvm/lib/Target/Hexagon/CMakeLists.txt        |   1 +
 .../Target/Hexagon/HexagonGenMemAbsolute.cpp  | 274 ++++++++++++++++++
 .../Target/Hexagon/HexagonTargetMachine.cpp   |   9 +
 .../CodeGen/Hexagon/load-const-extend-opt.ll  |  68 +++++
 .../CodeGen/Hexagon/store-const-extend-opt.ll |  72 +++++
 5 files changed, 424 insertions(+)
 create mode 100644 llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
 create mode 100644 llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll
 create mode 100644 llvm/test/CodeGen/Hexagon/store-const-extend-opt.ll

diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index 76f99b4d3ec580..753f3dcc88e19b 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_target(HexagonCodeGen
   HexagonFrameLowering.cpp
   HexagonGenExtract.cpp
   HexagonGenInsert.cpp
+  HexagonGenMemAbsolute.cpp
   HexagonGenMux.cpp
   HexagonGenPredicate.cpp
   HexagonHardwareLoops.cpp
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
new file mode 100644
index 00000000000000..afd49631943f26
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
@@ -0,0 +1,274 @@
+//===--- HexagonGenMemAbsolute.cpp - Generate Load/Store Set Absolute ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This pass traverses through all the basic blocks in a function and converts
+// an indexed load/store with offset "0" to a absolute-set load/store
+// instruction as long as the use of the register in the new instruction
+// dominates the rest of the uses and there are more than 2 uses.
+
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "hexagon-abs"
+
+using namespace llvm;
+
+STATISTIC(HexagonNumLoadAbsConversions,
+          "Number of Load instructions converted to absolute-set form");
+STATISTIC(HexagonNumStoreAbsConversions,
+          "Number of Store instructions converted to absolute-set form");
+
+namespace llvm {
+FunctionPass *createHexagonGenMemAbsolute();
+void initializeHexagonGenMemAbsolutePass(PassRegistry &Registry);
+} // namespace llvm
+
+namespace {
+
+class HexagonGenMemAbsolute : public MachineFunctionPass {
+  const HexagonInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+  const TargetRegisterInfo *TRI;
+
+public:
+  static char ID;
+  HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {
+    initializeHexagonGenMemAbsolutePass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+    return "Hexagon Generate Load/Store Set Absolute Address Instruction";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+  static bool isValidIndexedLoad(int &Opcode, int &NewOpcode);
+  static bool isValidIndexedStore(int &Opcode, int &NewOpcode);
+};
+} // namespace
+
+char HexagonGenMemAbsolute::ID = 0;
+
+INITIALIZE_PASS(HexagonGenMemAbsolute, "hexagon-gen-load-absolute",
+                "Hexagon Generate Load/Store Set Absolute Address Instruction",
+                false, false)
+
+bool HexagonGenMemAbsolute::runOnMachineFunction(MachineFunction &Fn) {
+  if (skipFunction(Fn.getFunction()))
+    return false;
+
+  TII = Fn.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  MRI = &Fn.getRegInfo();
+  TRI = Fn.getRegInfo().getTargetRegisterInfo();
+
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+  // Loop over all of the basic blocks
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock *MBB = &*MBBb;
+    // Traverse the basic block
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         ++MII) {
+      MachineInstr *MI = &*MII;
+      int Opc = MI->getOpcode();
+      if (Opc != Hexagon::CONST32 && Opc != Hexagon::A2_tfrsi)
+        continue;
+
+      const MachineOperand &MO = MI->getOperand(0);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      unsigned DstReg = MO.getReg();
+      if (MRI->use_nodbg_empty(DstReg))
+        continue;
+
+      typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+      use_iterator NextUseMI = MRI->use_nodbg_begin(DstReg);
+
+      MachineInstr *NextMI = NextUseMI->getParent();
+      int NextOpc = NextMI->getOpcode();
+      int NewOpc;
+      bool IsLoad = isValidIndexedLoad(NextOpc, NewOpc);
+
+      if (!IsLoad && !isValidIndexedStore(NextOpc, NewOpc))
+        continue;
+
+      // Base and Offset positions for load and store instructions
+      // Load R(dest), R(base), Imm -> R(dest) = mem(R(base) + Imm)
+      // Store R(base), Imm, R (src) -> mem(R(base) + Imm) = R(src)
+      unsigned BaseRegPos, ImmPos, RegPos;
+      if (!TII->getBaseAndOffsetPosition(*NextMI, BaseRegPos, ImmPos))
+        continue;
+      RegPos = IsLoad ? 0 : 2;
+
+      bool IsGlobal = MI->getOperand(1).isGlobal();
+      if (!MI->getOperand(1).isImm() && !IsGlobal)
+        continue;
+
+      const MachineOperand *BaseOp = nullptr;
+      int64_t Offset;
+      bool Scalable;
+      TII->getMemOperandWithOffset(*NextMI, BaseOp, Offset, Scalable, TRI);
+
+      // Ensure BaseOp is non-null and register type.
+      if (!BaseOp || !BaseOp->isReg())
+        continue;
+
+      if (Scalable)
+        continue;
+
+      unsigned BaseReg = BaseOp->getReg();
+      if ((DstReg != BaseReg) || (Offset != 0))
+        continue;
+
+      const MachineOperand &MO0 = NextMI->getOperand(RegPos);
+
+      if (!MO0.isReg())
+        continue;
+
+      unsigned LoadStoreReg = MO0.getReg();
+
+      // Store: Bail out if the src and base are same (def and use on same
+      // register).
+      if (LoadStoreReg == BaseReg)
+        continue;
+
+      // Insert the absolute-set instruction "I" only if the use of the
+      // BaseReg in "I" dominates the rest of the uses of BaseReg and if
+      // there are more than 2 uses of this BaseReg.
+      bool Dominates = true;
+      unsigned Counter = 0;
+      for (use_iterator I = NextUseMI, E = MRI->use_nodbg_end(); I != E; ++I) {
+        Counter++;
+        if (!MDT.dominates(NextMI, I->getParent()))
+          Dominates = false;
+      }
+
+      if ((!Dominates) || (Counter < 3))
+        continue;
+
+      // If we reach here, we have met all the conditions required for the
+      // replacement of the absolute instruction.
+      LLVM_DEBUG({
+        dbgs() << "Found a pair of instructions for absolute-set "
+               << (IsLoad ? "load" : "store") << "\n";
+        dbgs() << *MI;
+        dbgs() << *NextMI;
+      });
+      MachineBasicBlock *ParentBlock = NextMI->getParent();
+      MachineInstrBuilder MIB;
+      if (IsLoad) { // Insert absolute-set load instruction
+        ++HexagonNumLoadAbsConversions;
+        MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
+                      TII->get(NewOpc), LoadStoreReg)
+                  .addReg(DstReg, RegState::Define);
+      } else { // Insert absolute-set store instruction
+        ++HexagonNumStoreAbsConversions;
+        MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
+                      TII->get(NewOpc), DstReg);
+      }
+
+      MachineOperand ImmOperand = MI->getOperand(1);
+      if (IsGlobal)
+        MIB.addGlobalAddress(ImmOperand.getGlobal(), ImmOperand.getOffset(),
+                             ImmOperand.getTargetFlags());
+      else
+        MIB.addImm(ImmOperand.getImm());
+
+      if (IsLoad)
+        MIB->getOperand(0).setSubReg(MO0.getSubReg());
+      else
+        MIB.addReg(LoadStoreReg, 0, MO0.getSubReg());
+
+      LLVM_DEBUG(dbgs() << "Replaced with " << *MIB << "\n");
+      // Erase the instructions that got replaced.
+      MII = MBB->erase(MI);
+      --MII;
+      NextMI->getParent()->erase(NextMI);
+    }
+  }
+
+  return true;
+}
+
+bool HexagonGenMemAbsolute::isValidIndexedLoad(int &Opc, int &NewOpc) {
+
+  bool Result = true;
+  switch (Opc) {
+  case Hexagon::L2_loadrb_io:
+    NewOpc = Hexagon::L4_loadrb_ap;
+    break;
+  case Hexagon::L2_loadrh_io:
+    NewOpc = Hexagon::L4_loadrh_ap;
+    break;
+  case Hexagon::L2_loadri_io:
+    NewOpc = Hexagon::L4_loadri_ap;
+    break;
+  case Hexagon::L2_loadrd_io:
+    NewOpc = Hexagon::L4_loadrd_ap;
+    break;
+  case Hexagon::L2_loadruh_io:
+    NewOpc = Hexagon::L4_loadruh_ap;
+    break;
+  case Hexagon::L2_loadrub_io:
+    NewOpc = Hexagon::L4_loadrub_ap;
+    break;
+  default:
+    Result = false;
+  }
+
+  return Result;
+}
+
+bool HexagonGenMemAbsolute::isValidIndexedStore(int &Opc, int &NewOpc) {
+
+  bool Result = true;
+  switch (Opc) {
+  case Hexagon::S2_storerd_io:
+    NewOpc = Hexagon::S4_storerd_ap;
+    break;
+  case Hexagon::S2_storeri_io:
+    NewOpc = Hexagon::S4_storeri_ap;
+    break;
+  case Hexagon::S2_storerh_io:
+    NewOpc = Hexagon::S4_storerh_ap;
+    break;
+  case Hexagon::S2_storerb_io:
+    NewOpc = Hexagon::S4_storerb_ap;
+    break;
+  default:
+    Result = false;
+  }
+
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonGenMemAbsolute() {
+  return new HexagonGenMemAbsolute();
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 7d4b420071c4a3..49ef547d65fb29 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -92,6 +92,10 @@ static cl::opt<bool>
 static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
   cl::desc("Disable splitting double registers"));
 
+static cl::opt<bool>
+    EnableGenMemAbs("hexagon-mem-abs", cl::init(true), cl::Hidden,
+                    cl::desc("Generate absolute set instructions"));
+
 static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
   cl::Hidden, cl::desc("Bit simplification"));
 
@@ -151,6 +155,7 @@ namespace llvm {
   void initializeHexagonCopyToCombinePass(PassRegistry&);
   void initializeHexagonEarlyIfConversionPass(PassRegistry&);
   void initializeHexagonExpandCondsetsPass(PassRegistry&);
+  void initializeHexagonGenMemAbsolutePass(PassRegistry &);
   void initializeHexagonGenMuxPass(PassRegistry&);
   void initializeHexagonHardwareLoopsPass(PassRegistry&);
   void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
@@ -177,6 +182,7 @@ namespace llvm {
   FunctionPass *createHexagonFixupHwLoops();
   FunctionPass *createHexagonGenExtract();
   FunctionPass *createHexagonGenInsert();
+  FunctionPass *createHexagonGenMemAbsolute();
   FunctionPass *createHexagonGenMux();
   FunctionPass *createHexagonGenPredicate();
   FunctionPass *createHexagonHardwareLoops();
@@ -211,6 +217,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
   initializeHexagonConstPropagationPass(PR);
   initializeHexagonCopyToCombinePass(PR);
   initializeHexagonEarlyIfConversionPass(PR);
+  initializeHexagonGenMemAbsolutePass(PR);
   initializeHexagonGenMuxPass(PR);
   initializeHexagonHardwareLoopsPass(PR);
   initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR);
@@ -413,6 +420,8 @@ void HexagonPassConfig::addPreRegAlloc() {
       insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
     if (!DisableStoreWidening)
       addPass(createHexagonStoreWidening());
+    if (EnableGenMemAbs)
+      addPass(createHexagonGenMemAbsolute());
     if (!DisableHardwareLoops)
       addPass(createHexagonHardwareLoops());
   }
diff --git a/llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll b/llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll
new file mode 100644
index 00000000000000..6f9e83c23ab326
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll
@@ -0,0 +1,68 @@
+; RUN: llc -march=hexagon -O3 -hexagon-small-data-threshold=0 < %s | FileCheck %s
+; This test checks the case if there are more than 2 uses of a constan address, move the
+; value in to a register and replace all instances of constant with the register.
+; The GenMemAbsolute pass generates a absolute-set instruction if there are more
+; than 2 uses of this register.
+
+; CHECK: loadi32_3
+; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
+; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
+; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##441652)
+; CHECK-NOT: r{{[0-9]+}} = {emw(##441652)
+; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
+; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
+; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
+; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
+
+define void @loadi32_3() #0 {
+entry:
+  %0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+  %1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+  %2 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+  ret void
+}
+
+; CHECK: loadi32_2
+; CHECK-NOT: r{{[0-9]+}} = ##441652
+; CHECK: r{{[0-9]+}} = memw(##441652)
+; CHECK: r{{[0-9]+}} = memw(##441652)
+
+define void @loadi32_2() #0 {
+entry:
+  %0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+  %1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+  ret void
+}
+
+; CHECK: loadi32_abs_global_3
+; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
+; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
+; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##globalInt)
+; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
+; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
+; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
+; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
+; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
+
+ at globalInt = external global i32, align 8
+define void @loadi32_abs_global_3() #0 {
+entry:
+  %0 = load volatile i32, ptr @globalInt, align 4
+  %1 = load volatile i32, ptr @globalInt, align 4
+  %2 = load volatile i32, ptr @globalInt, align 4
+  ret void
+}
+
+; CHECK: loadi32_abs_global_2
+; CHECK-NOT:r[[REG:[0-9]+]] = ##globalInt
+; CHECK:r{{[0-9]+}} = memw(##globalInt)
+; CHECK:r{{[0-9]+}} = memw(##globalInt)
+
+define void @loadi32_abs_global_2() #0 {
+entry:
+  %0 = load volatile i32, ptr @globalInt, align 4
+  %1 = load volatile i32, ptr @globalInt, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/Hexagon/store-const-extend-opt.ll b/llvm/test/CodeGen/Hexagon/store-const-extend-opt.ll
new file mode 100644
index 00000000000000..dccf176f3bd07f
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/store-const-extend-opt.ll
@@ -0,0 +1,72 @@
+; RUN: llc -march=hexagon -O3 -hexagon-small-data-threshold=0 < %s | FileCheck %s
+; This test checks the case if there are more than 2 uses of a constan address, move the
+; value in to a register and replace all instances of constant with the register.
+; The GenMemAbsolute pass generates a absolute-set instruction if there are more
+; than 2 uses of this register.
+
+; CHECK: storetrunci32_3
+; CHECK-NOT: memw(##441652) = r{{[0-9]+}}
+; CHECK-NOT: memw(r{{[0-9]+}}+#0) = r{{[0-9]+}}
+; CHECK:memw(r[[REG:[0-9]+]]=##441652) = r{{[0-9]+}}
+; CHECK-NOT: memw(##441652) = r{{[0-9]+}}
+; CHECK:memw(r[[REG]]+#0) = r{{[0-9]+}}
+; CHECK-NOT: memw(##441652) = r{{[0-9]+}}
+; CHECK:memw(r[[REG]]+#0) = r{{[0-9]+}}
+; CHECK-NOT: memw(##441652) = r{{[0-9]+}}
+
+define void @storetrunci32_3(i64 %descr_addr, i32 %rpm_or_sys, i32 %kkr) #0 {
+entry:
+  %conv = trunc i64 %descr_addr to i32
+  store volatile i32 %conv, ptr inttoptr (i32 441652 to ptr), align 4
+  store volatile i32 %rpm_or_sys, ptr inttoptr (i32 441652 to ptr), align 4
+  store volatile i32 %kkr, ptr inttoptr (i32 441652 to ptr), align 4
+  ret void
+}
+
+; CHECK: storetrunci32_2
+; CHECK-NOT: r{{[0-9]+}} = ##441652
+; CHECK: memw(##441652) = r{{[0-9]+}}
+; CHECK: memw(##441652) = r{{[0-9]+}}
+
+define void @storetrunci32_2(i64 %descr_addr, i32 %rpm_or_sys) #0 {
+entry:
+  %conv = trunc i64 %descr_addr to i32
+  store volatile i32 %conv, ptr inttoptr (i32 441652 to ptr), align 4
+  store volatile i32 %rpm_or_sys, ptr inttoptr (i32 441652 to ptr), align 4
+  ret void
+}
+
+; CHECK: storetrunci32_abs_global_3
+; CHECK-NOT: memw(##globalInt) = r{{[0-9]+}}
+; CHECK-NOT: memw(r{{[0-9]+}}+#0) = r{{[0-9]+}}
+; CHECK:memw(r[[REG:[0-9]+]]=##globalInt) = r{{[0-9]+}}
+; CHECK-NOT: memw(##globalInt) = r{{[0-9]+}}
+; CHECK:memw(r[[REG]]+#0) = r{{[0-9]+}}
+; CHECK-NOT: memw(##globalInt) = r{{[0-9]+}}
+; CHECK:memw(r[[REG]]+#0) = r{{[0-9]+}}
+; CHECK-NOT: memw(##globalInt) = r{{[0-9]+}}
+
+ at globalInt = external global i32, align 8
+define void @storetrunci32_abs_global_3(i64 %descr_addr, i32 %rpm_or_sys, i32 %kkr) #0 {
+entry:
+  %conv = trunc i64 %descr_addr to i32
+  store volatile i32 %conv, ptr @globalInt, align 4
+  store volatile i32 %rpm_or_sys, ptr @globalInt, align 4
+  store volatile i32 %kkr, ptr @globalInt, align 4
+  ret void
+}
+
+; CHECK: storetrunci32_abs_global_2
+; CHECK-NOT:r[[REG:[0-9]+]] = ##globalInt
+; CHECK:memw(##globalInt) = r{{[0-9]+}}
+; CHECK:memw(##globalInt) = r{{[0-9]+}}
+
+define void @storetrunci32_abs_global_2(i64 %descr_addr, i32 %rpm_or_sys) #0 {
+entry:
+  %conv = trunc i64 %descr_addr to i32
+  store volatile i32 %conv, ptr @globalInt, align 4
+  store volatile i32 %rpm_or_sys, ptr @globalInt, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }



More information about the llvm-commits mailing list