[llvm] [Hexagon] Generate absolute-set load/store instructions. (PR #82034)
Sumanth Gundapaneni via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 16 12:47:12 PST 2024
https://github.com/sgundapa updated https://github.com/llvm/llvm-project/pull/82034
>From b402e17143bac8e3c07810a5b5ffa29b04a945b6 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sgundapa at quicinc.com>
Date: Fri, 16 Feb 2024 11:48:58 -0800
Subject: [PATCH] [Hexagon] Generate absolute-set load/store instructions.
The optimization finds the loads/stores of a specific form and
translate the first load/store to an absolute-set form there by
optimizing out the transfer and eliminate the constant extenders.
---
llvm/lib/Target/Hexagon/CMakeLists.txt | 1 +
.../Target/Hexagon/HexagonGenMemAbsolute.cpp | 274 ++++++++++++++++++
.../Target/Hexagon/HexagonTargetMachine.cpp | 9 +
.../CodeGen/Hexagon/load-const-extend-opt.ll | 68 +++++
.../CodeGen/Hexagon/store-const-extend-opt.ll | 72 +++++
5 files changed, 424 insertions(+)
create mode 100644 llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
create mode 100644 llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll
create mode 100644 llvm/test/CodeGen/Hexagon/store-const-extend-opt.ll
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index 76f99b4d3ec580..753f3dcc88e19b 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_target(HexagonCodeGen
HexagonFrameLowering.cpp
HexagonGenExtract.cpp
HexagonGenInsert.cpp
+ HexagonGenMemAbsolute.cpp
HexagonGenMux.cpp
HexagonGenPredicate.cpp
HexagonHardwareLoops.cpp
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
new file mode 100644
index 00000000000000..afd49631943f26
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
@@ -0,0 +1,274 @@
+//===--- HexagonGenMemAbsolute.cpp - Generate Load/Store Set Absolute ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This pass traverses through all the basic blocks in a function and converts
+// an indexed load/store with offset "0" to a absolute-set load/store
+// instruction as long as the use of the register in the new instruction
+// dominates the rest of the uses and there are more than 2 uses.
+
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "hexagon-abs"
+
+using namespace llvm;
+
+STATISTIC(HexagonNumLoadAbsConversions,
+ "Number of Load instructions converted to absolute-set form");
+STATISTIC(HexagonNumStoreAbsConversions,
+ "Number of Store instructions converted to absolute-set form");
+
+namespace llvm {
+FunctionPass *createHexagonGenMemAbsolute();
+void initializeHexagonGenMemAbsolutePass(PassRegistry &Registry);
+} // namespace llvm
+
+namespace {
+
+class HexagonGenMemAbsolute : public MachineFunctionPass {
+ const HexagonInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+
+public:
+ static char ID;
+ HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {
+ initializeHexagonGenMemAbsolutePass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Hexagon Generate Load/Store Set Absolute Address Instruction";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+ static bool isValidIndexedLoad(int &Opcode, int &NewOpcode);
+ static bool isValidIndexedStore(int &Opcode, int &NewOpcode);
+};
+} // namespace
+
+char HexagonGenMemAbsolute::ID = 0;
+
+INITIALIZE_PASS(HexagonGenMemAbsolute, "hexagon-gen-load-absolute",
+ "Hexagon Generate Load/Store Set Absolute Address Instruction",
+ false, false)
+
+bool HexagonGenMemAbsolute::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+
+ TII = Fn.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ MRI = &Fn.getRegInfo();
+ TRI = Fn.getRegInfo().getTargetRegisterInfo();
+
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock *MBB = &*MBBb;
+ // Traverse the basic block
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = &*MII;
+ int Opc = MI->getOpcode();
+ if (Opc != Hexagon::CONST32 && Opc != Hexagon::A2_tfrsi)
+ continue;
+
+ const MachineOperand &MO = MI->getOperand(0);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned DstReg = MO.getReg();
+ if (MRI->use_nodbg_empty(DstReg))
+ continue;
+
+ typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+ use_iterator NextUseMI = MRI->use_nodbg_begin(DstReg);
+
+ MachineInstr *NextMI = NextUseMI->getParent();
+ int NextOpc = NextMI->getOpcode();
+ int NewOpc;
+ bool IsLoad = isValidIndexedLoad(NextOpc, NewOpc);
+
+ if (!IsLoad && !isValidIndexedStore(NextOpc, NewOpc))
+ continue;
+
+ // Base and Offset positions for load and store instructions
+ // Load R(dest), R(base), Imm -> R(dest) = mem(R(base) + Imm)
+ // Store R(base), Imm, R (src) -> mem(R(base) + Imm) = R(src)
+ unsigned BaseRegPos, ImmPos, RegPos;
+ if (!TII->getBaseAndOffsetPosition(*NextMI, BaseRegPos, ImmPos))
+ continue;
+ RegPos = IsLoad ? 0 : 2;
+
+ bool IsGlobal = MI->getOperand(1).isGlobal();
+ if (!MI->getOperand(1).isImm() && !IsGlobal)
+ continue;
+
+ const MachineOperand *BaseOp = nullptr;
+ int64_t Offset;
+ bool Scalable;
+ TII->getMemOperandWithOffset(*NextMI, BaseOp, Offset, Scalable, TRI);
+
+ // Ensure BaseOp is non-null and register type.
+ if (!BaseOp || !BaseOp->isReg())
+ continue;
+
+ if (Scalable)
+ continue;
+
+ unsigned BaseReg = BaseOp->getReg();
+ if ((DstReg != BaseReg) || (Offset != 0))
+ continue;
+
+ const MachineOperand &MO0 = NextMI->getOperand(RegPos);
+
+ if (!MO0.isReg())
+ continue;
+
+ unsigned LoadStoreReg = MO0.getReg();
+
+ // Store: Bail out if the src and base are same (def and use on same
+ // register).
+ if (LoadStoreReg == BaseReg)
+ continue;
+
+ // Insert the absolute-set instruction "I" only if the use of the
+ // BaseReg in "I" dominates the rest of the uses of BaseReg and if
+ // there are more than 2 uses of this BaseReg.
+ bool Dominates = true;
+ unsigned Counter = 0;
+ for (use_iterator I = NextUseMI, E = MRI->use_nodbg_end(); I != E; ++I) {
+ Counter++;
+ if (!MDT.dominates(NextMI, I->getParent()))
+ Dominates = false;
+ }
+
+ if ((!Dominates) || (Counter < 3))
+ continue;
+
+ // If we reach here, we have met all the conditions required for the
+ // replacement of the absolute instruction.
+ LLVM_DEBUG({
+ dbgs() << "Found a pair of instructions for absolute-set "
+ << (IsLoad ? "load" : "store") << "\n";
+ dbgs() << *MI;
+ dbgs() << *NextMI;
+ });
+ MachineBasicBlock *ParentBlock = NextMI->getParent();
+ MachineInstrBuilder MIB;
+ if (IsLoad) { // Insert absolute-set load instruction
+ ++HexagonNumLoadAbsConversions;
+ MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
+ TII->get(NewOpc), LoadStoreReg)
+ .addReg(DstReg, RegState::Define);
+ } else { // Insert absolute-set store instruction
+ ++HexagonNumStoreAbsConversions;
+ MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
+ TII->get(NewOpc), DstReg);
+ }
+
+ MachineOperand ImmOperand = MI->getOperand(1);
+ if (IsGlobal)
+ MIB.addGlobalAddress(ImmOperand.getGlobal(), ImmOperand.getOffset(),
+ ImmOperand.getTargetFlags());
+ else
+ MIB.addImm(ImmOperand.getImm());
+
+ if (IsLoad)
+ MIB->getOperand(0).setSubReg(MO0.getSubReg());
+ else
+ MIB.addReg(LoadStoreReg, 0, MO0.getSubReg());
+
+ LLVM_DEBUG(dbgs() << "Replaced with " << *MIB << "\n");
+ // Erase the instructions that got replaced.
+ MII = MBB->erase(MI);
+ --MII;
+ NextMI->getParent()->erase(NextMI);
+ }
+ }
+
+ return true;
+}
+
+bool HexagonGenMemAbsolute::isValidIndexedLoad(int &Opc, int &NewOpc) {
+
+ bool Result = true;
+ switch (Opc) {
+ case Hexagon::L2_loadrb_io:
+ NewOpc = Hexagon::L4_loadrb_ap;
+ break;
+ case Hexagon::L2_loadrh_io:
+ NewOpc = Hexagon::L4_loadrh_ap;
+ break;
+ case Hexagon::L2_loadri_io:
+ NewOpc = Hexagon::L4_loadri_ap;
+ break;
+ case Hexagon::L2_loadrd_io:
+ NewOpc = Hexagon::L4_loadrd_ap;
+ break;
+ case Hexagon::L2_loadruh_io:
+ NewOpc = Hexagon::L4_loadruh_ap;
+ break;
+ case Hexagon::L2_loadrub_io:
+ NewOpc = Hexagon::L4_loadrub_ap;
+ break;
+ default:
+ Result = false;
+ }
+
+ return Result;
+}
+
+bool HexagonGenMemAbsolute::isValidIndexedStore(int &Opc, int &NewOpc) {
+
+ bool Result = true;
+ switch (Opc) {
+ case Hexagon::S2_storerd_io:
+ NewOpc = Hexagon::S4_storerd_ap;
+ break;
+ case Hexagon::S2_storeri_io:
+ NewOpc = Hexagon::S4_storeri_ap;
+ break;
+ case Hexagon::S2_storerh_io:
+ NewOpc = Hexagon::S4_storerh_ap;
+ break;
+ case Hexagon::S2_storerb_io:
+ NewOpc = Hexagon::S4_storerb_ap;
+ break;
+ default:
+ Result = false;
+ }
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonGenMemAbsolute() {
+ return new HexagonGenMemAbsolute();
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 7d4b420071c4a3..49ef547d65fb29 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -92,6 +92,10 @@ static cl::opt<bool>
static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
cl::desc("Disable splitting double registers"));
+static cl::opt<bool>
+ EnableGenMemAbs("hexagon-mem-abs", cl::init(true), cl::Hidden,
+ cl::desc("Generate absolute set instructions"));
+
static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
cl::Hidden, cl::desc("Bit simplification"));
@@ -151,6 +155,7 @@ namespace llvm {
void initializeHexagonCopyToCombinePass(PassRegistry&);
void initializeHexagonEarlyIfConversionPass(PassRegistry&);
void initializeHexagonExpandCondsetsPass(PassRegistry&);
+ void initializeHexagonGenMemAbsolutePass(PassRegistry &);
void initializeHexagonGenMuxPass(PassRegistry&);
void initializeHexagonHardwareLoopsPass(PassRegistry&);
void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
@@ -177,6 +182,7 @@ namespace llvm {
FunctionPass *createHexagonFixupHwLoops();
FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
+ FunctionPass *createHexagonGenMemAbsolute();
FunctionPass *createHexagonGenMux();
FunctionPass *createHexagonGenPredicate();
FunctionPass *createHexagonHardwareLoops();
@@ -211,6 +217,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
initializeHexagonConstPropagationPass(PR);
initializeHexagonCopyToCombinePass(PR);
initializeHexagonEarlyIfConversionPass(PR);
+ initializeHexagonGenMemAbsolutePass(PR);
initializeHexagonGenMuxPass(PR);
initializeHexagonHardwareLoopsPass(PR);
initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR);
@@ -413,6 +420,8 @@ void HexagonPassConfig::addPreRegAlloc() {
insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
if (!DisableStoreWidening)
addPass(createHexagonStoreWidening());
+ if (EnableGenMemAbs)
+ addPass(createHexagonGenMemAbsolute());
if (!DisableHardwareLoops)
addPass(createHexagonHardwareLoops());
}
diff --git a/llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll b/llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll
new file mode 100644
index 00000000000000..6f9e83c23ab326
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll
@@ -0,0 +1,68 @@
+; RUN: llc -march=hexagon -O3 -hexagon-small-data-threshold=0 < %s | FileCheck %s
+; This test checks the case if there are more than 2 uses of a constan address, move the
+; value in to a register and replace all instances of constant with the register.
+; The GenMemAbsolute pass generates a absolute-set instruction if there are more
+; than 2 uses of this register.
+
+; CHECK: loadi32_3
+; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
+; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
+; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##441652)
+; CHECK-NOT: r{{[0-9]+}} = {emw(##441652)
+; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
+; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
+; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
+; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
+
+define void @loadi32_3() #0 {
+entry:
+ %0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+ %1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+ %2 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+ ret void
+}
+
+; CHECK: loadi32_2
+; CHECK-NOT: r{{[0-9]+}} = ##441652
+; CHECK: r{{[0-9]+}} = memw(##441652)
+; CHECK: r{{[0-9]+}} = memw(##441652)
+
+define void @loadi32_2() #0 {
+entry:
+ %0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+ %1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
+ ret void
+}
+
+; CHECK: loadi32_abs_global_3
+; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
+; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
+; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##globalInt)
+; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
+; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
+; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
+; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
+; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
+
+ at globalInt = external global i32, align 8
+define void @loadi32_abs_global_3() #0 {
+entry:
+ %0 = load volatile i32, ptr @globalInt, align 4
+ %1 = load volatile i32, ptr @globalInt, align 4
+ %2 = load volatile i32, ptr @globalInt, align 4
+ ret void
+}
+
+; CHECK: loadi32_abs_global_2
+; CHECK-NOT:r[[REG:[0-9]+]] = ##globalInt
+; CHECK:r{{[0-9]+}} = memw(##globalInt)
+; CHECK:r{{[0-9]+}} = memw(##globalInt)
+
+define void @loadi32_abs_global_2() #0 {
+entry:
+ %0 = load volatile i32, ptr @globalInt, align 4
+ %1 = load volatile i32, ptr @globalInt, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/Hexagon/store-const-extend-opt.ll b/llvm/test/CodeGen/Hexagon/store-const-extend-opt.ll
new file mode 100644
index 00000000000000..dccf176f3bd07f
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/store-const-extend-opt.ll
@@ -0,0 +1,72 @@
+; RUN: llc -march=hexagon -O3 -hexagon-small-data-threshold=0 < %s | FileCheck %s
+; This test checks the case if there are more than 2 uses of a constan address, move the
+; value in to a register and replace all instances of constant with the register.
+; The GenMemAbsolute pass generates a absolute-set instruction if there are more
+; than 2 uses of this register.
+
+; CHECK: storetrunci32_3
+; CHECK-NOT: memw(##441652) = r{{[0-9]+}}
+; CHECK-NOT: memw(r{{[0-9]+}}+#0) = r{{[0-9]+}}
+; CHECK:memw(r[[REG:[0-9]+]]=##441652) = r{{[0-9]+}}
+; CHECK-NOT: memw(##441652) = r{{[0-9]+}}
+; CHECK:memw(r[[REG]]+#0) = r{{[0-9]+}}
+; CHECK-NOT: memw(##441652) = r{{[0-9]+}}
+; CHECK:memw(r[[REG]]+#0) = r{{[0-9]+}}
+; CHECK-NOT: memw(##441652) = r{{[0-9]+}}
+
+define void @storetrunci32_3(i64 %descr_addr, i32 %rpm_or_sys, i32 %kkr) #0 {
+entry:
+ %conv = trunc i64 %descr_addr to i32
+ store volatile i32 %conv, ptr inttoptr (i32 441652 to ptr), align 4
+ store volatile i32 %rpm_or_sys, ptr inttoptr (i32 441652 to ptr), align 4
+ store volatile i32 %kkr, ptr inttoptr (i32 441652 to ptr), align 4
+ ret void
+}
+
+; CHECK: storetrunci32_2
+; CHECK-NOT: r{{[0-9]+}} = ##441652
+; CHECK: memw(##441652) = r{{[0-9]+}}
+; CHECK: memw(##441652) = r{{[0-9]+}}
+
+define void @storetrunci32_2(i64 %descr_addr, i32 %rpm_or_sys) #0 {
+entry:
+ %conv = trunc i64 %descr_addr to i32
+ store volatile i32 %conv, ptr inttoptr (i32 441652 to ptr), align 4
+ store volatile i32 %rpm_or_sys, ptr inttoptr (i32 441652 to ptr), align 4
+ ret void
+}
+
+; CHECK: storetrunci32_abs_global_3
+; CHECK-NOT: memw(##globalInt) = r{{[0-9]+}}
+; CHECK-NOT: memw(r{{[0-9]+}}+#0) = r{{[0-9]+}}
+; CHECK:memw(r[[REG:[0-9]+]]=##globalInt) = r{{[0-9]+}}
+; CHECK-NOT: memw(##globalInt) = r{{[0-9]+}}
+; CHECK:memw(r[[REG]]+#0) = r{{[0-9]+}}
+; CHECK-NOT: memw(##globalInt) = r{{[0-9]+}}
+; CHECK:memw(r[[REG]]+#0) = r{{[0-9]+}}
+; CHECK-NOT: memw(##globalInt) = r{{[0-9]+}}
+
+ at globalInt = external global i32, align 8
+define void @storetrunci32_abs_global_3(i64 %descr_addr, i32 %rpm_or_sys, i32 %kkr) #0 {
+entry:
+ %conv = trunc i64 %descr_addr to i32
+ store volatile i32 %conv, ptr @globalInt, align 4
+ store volatile i32 %rpm_or_sys, ptr @globalInt, align 4
+ store volatile i32 %kkr, ptr @globalInt, align 4
+ ret void
+}
+
+; CHECK: storetrunci32_abs_global_2
+; CHECK-NOT:r[[REG:[0-9]+]] = ##globalInt
+; CHECK:memw(##globalInt) = r{{[0-9]+}}
+; CHECK:memw(##globalInt) = r{{[0-9]+}}
+
+define void @storetrunci32_abs_global_2(i64 %descr_addr, i32 %rpm_or_sys) #0 {
+entry:
+ %conv = trunc i64 %descr_addr to i32
+ store volatile i32 %conv, ptr @globalInt, align 4
+ store volatile i32 %rpm_or_sys, ptr @globalInt, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list