[llvm] [LoongArch] Add machine function pass to merge base + offset (PR #101139)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 23:08:17 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
This commit references RISC-V to add a machine function pass to merge the base address and offset.
---
Patch is 116.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101139.diff
26 Files Affected:
- (modified) llvm/lib/Target/LoongArch/CMakeLists.txt (+1)
- (modified) llvm/lib/Target/LoongArch/LoongArch.h (+2)
- (modified) llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp (+6)
- (added) llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp (+636)
- (modified) llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp (+3)
- (modified) llvm/test/CodeGen/LoongArch/block-address.ll (+4-6)
- (modified) llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll (+7-14)
- (modified) llvm/test/CodeGen/LoongArch/double-imm.ll (+2-4)
- (modified) llvm/test/CodeGen/LoongArch/float-imm.ll (+2-4)
- (modified) llvm/test/CodeGen/LoongArch/ghc-cc.ll (+17-34)
- (modified) llvm/test/CodeGen/LoongArch/global-address.ll (+6-12)
- (modified) llvm/test/CodeGen/LoongArch/global-variable-code-model.ll (+2-4)
- (modified) llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll (+2-4)
- (modified) llvm/test/CodeGen/LoongArch/inline-asm-constraint-m.ll (+4-6)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll (+15-30)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll (+4-8)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll (+6-12)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll (+12-16)
- (modified) llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll (+10-13)
- (modified) llvm/test/CodeGen/LoongArch/merge-base-offset.ll (+163-272)
- (modified) llvm/test/CodeGen/LoongArch/opt-pipeline.ll (+1)
- (modified) llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll (+4-8)
- (modified) llvm/test/CodeGen/LoongArch/vector-fp-imm.ll (+42-84)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected (+1-2)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected (+1-2)
- (modified) llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn (+1)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt
index cadc335a621f2..0f674b1b0fa9e 100644
--- a/llvm/lib/Target/LoongArch/CMakeLists.txt
+++ b/llvm/lib/Target/LoongArch/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_target(LoongArchCodeGen
LoongArchISelDAGToDAG.cpp
LoongArchISelLowering.cpp
LoongArchMCInstLower.cpp
+ LoongArchMergeBaseOffset.cpp
LoongArchOptWInstrs.cpp
LoongArchRegisterInfo.cpp
LoongArchSubtarget.cpp
diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h
index adfb844ee31b6..db60523738880 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.h
+++ b/llvm/lib/Target/LoongArch/LoongArch.h
@@ -36,12 +36,14 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
FunctionPass *createLoongArchDeadRegisterDefinitionsPass();
FunctionPass *createLoongArchExpandAtomicPseudoPass();
FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
+FunctionPass *createLoongArchMergeBaseOffsetOptPass();
FunctionPass *createLoongArchOptWInstrsPass();
FunctionPass *createLoongArchPreRAExpandPseudoPass();
FunctionPass *createLoongArchExpandPseudoPass();
void initializeLoongArchDAGToDAGISelLegacyPass(PassRegistry &);
void initializeLoongArchDeadRegisterDefinitionsPass(PassRegistry &);
void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &);
+void initializeLoongArchMergeBaseOffsetOptPass(PassRegistry &);
void initializeLoongArchOptWInstrsPass(PassRegistry &);
void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &);
void initializeLoongArchExpandPseudoPass(PassRegistry &);
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
index f478870217ec6..8bb9497a847fa 100644
--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
@@ -130,10 +130,16 @@ bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
OS << "$" << LoongArchInstPrinter::getRegisterName(BaseMO.getReg());
// Print the offset operand.
const MachineOperand &OffsetMO = MI->getOperand(OpNo + 1);
+ MCOperand MCO;
+ if (!lowerOperand(OffsetMO, MCO))
+ return true;
if (OffsetMO.isReg())
OS << ", $" << LoongArchInstPrinter::getRegisterName(OffsetMO.getReg());
else if (OffsetMO.isImm())
OS << ", " << OffsetMO.getImm();
+ else if (OffsetMO.isGlobal() || OffsetMO.isBlockAddress() ||
+ OffsetMO.isMCSymbol())
+ OS << ", " << *MCO.getExpr();
else
return true;
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
new file mode 100644
index 0000000000000..ae50b7a6f923e
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -0,0 +1,636 @@
+//===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Merge the offset of address calculation into the offset field
+// of instructions in a global address lowering sequence.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoongArch.h"
+#include "LoongArchTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+#include <optional>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarch-merge-base-offset"
+#define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset"
+
+namespace {
+
+class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
+ const LoongArchSubtarget *ST = nullptr;
+ MachineRegisterInfo *MRI;
+
+public:
+ static char ID;
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+ bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
+ MachineInstr *&Lo20, MachineInstr *&Hi12,
+ MachineInstr *&Last);
+
+ bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
+ MachineInstr *&Lo20, MachineInstr *&Hi12,
+ MachineInstr *&Last);
+ void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
+ MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
+ int64_t Offset);
+ bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12,
+ MachineInstr *&Lo20, MachineInstr *&Hi12,
+ MachineInstr *&Last, MachineInstr &TailAdd,
+ Register GAReg);
+
+ bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12,
+ MachineInstr *&Lo20, MachineInstr *&Hi12,
+ MachineInstr *&Last);
+
+ LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return LoongArch_MERGE_BASE_OFFSET_NAME;
+ }
+};
+} // end anonymous namespace
+
+char LoongArchMergeBaseOffsetOpt::ID = 0;
+INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE,
+ LoongArch_MERGE_BASE_OFFSET_NAME, false, false)
+
+// Detect either of the patterns:
+//
+// 1. (small/medium):
+// pcalau12i vreg1, %pc_hi20(s)
+// addi.d vreg2, vreg1, %pc_lo12(s)
+//
+// 2. (large):
+// pcalau12i vreg1, %pc_hi20(s)
+// addi.d vreg2, $zero, %pc_lo12(s)
+// lu32i.d vreg3, vreg2, %pc64_lo20(s)
+// lu52i.d vreg4, vreg3, %pc64_hi12(s)
+// add.d vreg5, vreg4, vreg1
+
+// The pattern is only accepted if:
+// 1) For small and medium pattern, the first instruction has only one use,
+// which is the ADDI.
+// 2) For large pattern, the first four instructions each have only one use,
+// and the user of the fourth instruction is ADD.
+// 3) The address operands have the appropriate type, reflecting the
+// lowering of a global address or constant pool using the pattern.
+// 4) The offset value in the Global Address or Constant Pool is 0.
+bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
+ MachineInstr *&Lo12,
+ MachineInstr *&Lo20,
+ MachineInstr *&Hi12,
+ MachineInstr *&Last) {
+ if (Hi20.getOpcode() != LoongArch::PCALAU12I)
+ return false;
+
+ const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
+ if (Hi20Op1.getTargetFlags() != LoongArchII::MO_PCREL_HI)
+ return false;
+
+ auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
+ return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress();
+ };
+
+ if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0)
+ return false;
+
+ Register HiDestReg = Hi20.getOperand(0).getReg();
+ if (!MRI->hasOneUse(HiDestReg))
+ return false;
+
+ MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg);
+ if (UseInst->getOpcode() != LoongArch::ADD_D) {
+ Lo12 = UseInst;
+ if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
+ (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
+ return false;
+ } else {
+ assert(ST->is64Bit());
+ Last = UseInst;
+
+ Register LastOp1Reg = Last->getOperand(1).getReg();
+ if (!LastOp1Reg.isVirtual())
+ return false;
+ Hi12 = MRI->getVRegDef(LastOp1Reg);
+ const MachineOperand &Hi12Op2 = Hi12->getOperand(2);
+ if (Hi12Op2.getTargetFlags() != LoongArchII::MO_PCREL64_HI)
+ return false;
+ if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0)
+ return false;
+ if (!MRI->hasOneUse(Hi12->getOperand(0).getReg()))
+ return false;
+
+ Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg());
+ const MachineOperand &Lo20Op2 = Lo20->getOperand(2);
+ if (Lo20Op2.getTargetFlags() != LoongArchII::MO_PCREL64_LO)
+ return false;
+ if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0)
+ return false;
+ if (!MRI->hasOneUse(Lo20->getOperand(0).getReg()))
+ return false;
+
+ Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg());
+ if (!MRI->hasOneUse(Lo12->getOperand(0).getReg()))
+ return false;
+ }
+
+ const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
+ assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
+ if (Lo12Op2.getTargetFlags() != LoongArchII::MO_PCREL_LO ||
+ !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
+ Lo12Op2.getOffset() != 0)
+ return false;
+
+ if (Hi20Op1.isGlobal()) {
+ LLVM_DEBUG(dbgs() << " Found lowered global address: "
+ << *Hi20Op1.getGlobal() << "\n");
+ } else if (Hi20Op1.isBlockAddress()) {
+ LLVM_DEBUG(dbgs() << " Found lowered basic address: "
+ << *Hi20Op1.getBlockAddress() << "\n");
+ } else if (Hi20Op1.isCPI()) {
+ LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
+ << "\n");
+ }
+
+ return true;
+}
+
+// Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions.
+// Delete the tail instruction and update all the uses to use the
+// output from Last.
+void LoongArchMergeBaseOffsetOpt::foldOffset(
+ MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
+ MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
+ int64_t Offset) {
+ assert(isInt<32>(Offset) && "Unexpected offset");
+ // Put the offset back in Hi and the Lo
+ Hi20.getOperand(1).setOffset(Offset);
+ Lo12.getOperand(2).setOffset(Offset);
+ if (Lo20 && Hi12) {
+ Lo20->getOperand(2).setOffset(Offset);
+ Hi12->getOperand(2).setOffset(Offset);
+ }
+ // Delete the tail instruction.
+ MachineInstr *Def = Last ? Last : &Lo12;
+ MRI->constrainRegClass(Def->getOperand(0).getReg(),
+ MRI->getRegClass(Tail.getOperand(0).getReg()));
+ MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
+ Tail.eraseFromParent();
+ LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
+ << " " << Hi20 << " " << Lo12;);
+ if (Lo20 && Hi12) {
+ LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;);
+ }
+}
+
+// Detect patterns for large offsets that are passed into an ADD instruction.
+// If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12
+// instructions and deletes TailAdd and the instructions that produced the
+// offset.
+//
+// Base address lowering is of the form:
+// Hi20: pcalau12i vreg1, %pc_hi20(s)
+// Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
+// / \
+// / \
+// / \
+// / The large offset can be of two forms: \
+// 1) Offset that has non zero bits in lower 2) Offset that has non zero
+// 12 bits and upper 20 bits bits in upper 20 bits only
+// OffsetHi: lu12i.w vreg3, 4
+// OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128
+// \ /
+// \ /
+// \ /
+// \ /
+// TailAdd: add.d vreg4, vreg2, voff
+bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
+ MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
+ MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
+ Register GAReg) {
+ assert((TailAdd.getOpcode() == LoongArch::ADD_W ||
+ TailAdd.getOpcode() == LoongArch::ADD_D) &&
+ "Expected ADD instruction!");
+ Register Rs = TailAdd.getOperand(1).getReg();
+ Register Rt = TailAdd.getOperand(2).getReg();
+ Register Reg = Rs == GAReg ? Rt : Rs;
+
+ // Can't fold if the register has more than one use.
+ if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
+ return false;
+ // This can point to an ORI or a LU12I.W:
+ MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
+ if (OffsetTail.getOpcode() == LoongArch::ORI) {
+ // The offset value has non zero bits in both %hi and %lo parts.
+ // Detect an ORI that feeds from a LU12I.W instruction.
+ MachineOperand &OriImmOp = OffsetTail.getOperand(2);
+ if (OriImmOp.getTargetFlags() != LoongArchII::MO_None)
+ return false;
+ Register OriReg = OffsetTail.getOperand(1).getReg();
+ int64_t OffLo = OriImmOp.getImm();
+
+ // Handle rs1 of ORI is R0.
+ if (OriReg == LoongArch::R0) {
+ LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
+ foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
+ OffsetTail.eraseFromParent();
+ return true;
+ }
+
+ MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg);
+ MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1);
+ if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W ||
+ Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None ||
+ !MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg()))
+ return false;
+ int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12);
+ Offset += OffLo;
+ // LU12I.W+ORI sign extends the result.
+ Offset = SignExtend64<32>(Offset);
+ LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
+ << " " << OffsetLu12i);
+ foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
+ OffsetTail.eraseFromParent();
+ OffsetLu12i.eraseFromParent();
+ return true;
+ } else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) {
+ // The offset value has all zero bits in the lower 12 bits. Only LU12I.W
+ // exists.
+ LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
+ int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
+ foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
+ OffsetTail.eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
+ MachineInstr &Lo12,
+ MachineInstr *&Lo20,
+ MachineInstr *&Hi12,
+ MachineInstr *&Last) {
+ Register DestReg =
+ Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
+
+ // Look for arithmetic instructions we can get an offset from.
+ // We might be able to remove the arithmetic instructions by folding the
+ // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I).
+ if (!MRI->hasOneUse(DestReg))
+ return false;
+
+ // DestReg has only one use.
+ MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
+ switch (Tail.getOpcode()) {
+ default:
+ LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
+ << Tail);
+ break;
+ case LoongArch::ADDI_W:
+ if (ST->is64Bit())
+ return false;
+ [[fallthrough]];
+ case LoongArch::ADDI_D:
+ case LoongArch::ADDU16I_D: {
+ // Offset is simply an immediate operand.
+ int64_t Offset = Tail.getOperand(2).getImm();
+ if (Tail.getOpcode() == LoongArch::ADDU16I_D)
+ Offset = SignExtend64<32>(Offset << 16);
+
+ // We might have two ADDIs in a row.
+ Register TailDestReg = Tail.getOperand(0).getReg();
+ if (MRI->hasOneUse(TailDestReg)) {
+ MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
+ if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W)
+ return false;
+ if (TailTail.getOpcode() == LoongArch::ADDI_W ||
+ TailTail.getOpcode() == LoongArch::ADDI_D) {
+ Offset += TailTail.getOperand(2).getImm();
+ LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
+ foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset);
+ Tail.eraseFromParent();
+ return true;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
+ foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset);
+ return true;
+ }
+ case LoongArch::ADD_W:
+ if (ST->is64Bit())
+ return false;
+ [[fallthrough]];
+ case LoongArch::ADD_D:
+ // The offset is too large to fit in the immediate field of ADDI.
+ // This can be in two forms:
+ // 1) LU12I.W hi_offset followed by:
+ // ORI lo_offset
+ // This happens in case the offset has non zero bits in
+ // both hi 20 and lo 12 bits.
+ // 2) LU12I.W (offset20)
+ // This happens in case the lower 12 bits of the offset are zeros.
+ return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
+ break;
+ }
+
+ return false;
+}
+
+// Memory access opcode mapping for transforms.
+static unsigned getNewOpc(unsigned Op, bool isLarge) {
+ switch (Op) {
+ case LoongArch::LD_B:
+ return isLarge ? LoongArch::LDX_B : LoongArch::LD_B;
+ case LoongArch::LD_H:
+ return isLarge ? LoongArch::LDX_H : LoongArch::LD_H;
+ case LoongArch::LD_W:
+ case LoongArch::LDPTR_W:
+ return isLarge ? LoongArch::LDX_W : LoongArch::LD_W;
+ case LoongArch::LD_D:
+ case LoongArch::LDPTR_D:
+ return isLarge ? LoongArch::LDX_D : LoongArch::LD_D;
+ case LoongArch::LD_BU:
+ return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU;
+ case LoongArch::LD_HU:
+ return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU;
+ case LoongArch::LD_WU:
+ return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU;
+ case LoongArch::FLD_S:
+ return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
+ case LoongArch::FLD_D:
+ return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
+ case LoongArch::ST_B:
+ return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
+ case LoongArch::ST_H:
+ return isLarge ? LoongArch::STX_H : LoongArch::ST_H;
+ case LoongArch::ST_W:
+ case LoongArch::STPTR_W:
+ return isLarge ? LoongArch::STX_W : LoongArch::ST_W;
+ case LoongArch::ST_D:
+ case LoongArch::STPTR_D:
+ return isLarge ? LoongArch::STX_D : LoongArch::ST_D;
+ case LoongArch::FST_S:
+ return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
+ case LoongArch::FST_D:
+ return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
+ default:
+ llvm_unreachable("Unexpected opcode for replacement");
+ }
+}
+
+bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
+ MachineInstr &Lo12,
+ MachineInstr *&Lo20,
+ MachineInstr *&Hi12,
+ MachineInstr *&Last) {
+ Register DestReg =
+ Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
+
+ // If all the uses are memory ops with the same offset, we can transform:
+ //
+ // 1. (small/medium):
+ // pcalau12i vreg1, %pc_hi20(s)
+ // addi.d vreg2, vreg1, %pc_lo12(s)
+ // ld.w vreg3, 8(vreg2)
+ //
+ // =>
+ //
+ // pcalau12i vreg1, %pc_hi20(s+8)
+ // ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1)
+ //
+ // 2. (large):
+ // pcalau12i vreg1, %pc_hi20(s)
+ // addi.d vreg2, $zero, %pc_lo12(s)
+ // lu32i.d vreg3, vreg2, %pc64_lo20(s)
+ // lu52i.d vreg4, vreg3, %pc64_hi12(s)
+ // add.d vreg5, vreg4, vreg1
+ // ld.w vreg6, 8(vreg5)
+ //
+ // =>
+ //
+ // pcalau12i vreg1, %pc_hi20(s+8)
+ // addi.d vreg2, $zero, %pc_lo12(s+8)
+ // lu32i.d vreg3, vreg2, %pc64_lo20(s+8)
+ // lu52i.d vreg4, vreg3, %pc64_hi12(s+8)
+ // ldx.w vreg6, vreg4, vreg1
+
+ std::opti...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101139
More information about the llvm-commits
mailing list