[llvm] [RISCV] Add a rematerializable pseudo instruction for LUI+ADDI for global addresses. (PR #93142)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 22 23:35:39 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

This allows register allocation to rematerialize these instead of spilling and reloading. We need to make it a single instruction due to limitations in rematerialization.
    
This pseudo is expanded to an LUI+ADDI pair between regalloc and post RA scheduling.

Co-authored-by: Jesse Huang <jesse.huang@<!-- -->sifive.com>

Stacked on #<!-- -->93129

---

Patch is 647.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93142.diff


39 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+71-4) 
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.td (+16) 
- (modified) llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp (+27-8) 
- (modified) llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp (+23) 
- (modified) llvm/test/CodeGen/RISCV/bfloat-mem.ll (+5-5) 
- (modified) llvm/test/CodeGen/RISCV/byval.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll (+1186-1202) 
- (modified) llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll (+724-734) 
- (modified) llvm/test/CodeGen/RISCV/callee-saved-gprs.ll (+1432-1470) 
- (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+11-11) 
- (modified) llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/double-mem.ll (+19-19) 
- (modified) llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll (+7-7) 
- (modified) llvm/test/CodeGen/RISCV/float-mem.ll (+10-10) 
- (modified) llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll (+12-12) 
- (modified) llvm/test/CodeGen/RISCV/global-merge-offset.ll (+7-7) 
- (modified) llvm/test/CodeGen/RISCV/global-merge.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/half-mem.ll (+20-20) 
- (modified) llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/mem.ll (+5-5) 
- (modified) llvm/test/CodeGen/RISCV/mem64.ll (+5-5) 
- (modified) llvm/test/CodeGen/RISCV/memcpy.ll (+60-60) 
- (modified) llvm/test/CodeGen/RISCV/push-pop-popret.ll (+1428-1466) 
- (modified) llvm/test/CodeGen/RISCV/rv32xtheadbb.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll (+5-5) 
- (modified) llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll (+20-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll (+133-142) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll (+10-10) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+8-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll (+40-40) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll (+5-5) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll (+6-6) 
- (modified) llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll (+25-25) 
- (modified) llvm/test/CodeGen/RISCV/saverestore.ll (+2249-128) 
- (modified) llvm/test/CodeGen/RISCV/tail-calls.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/unroll-loop-cse.ll (+13-19) 
- (modified) llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll (+4-4) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index d965dd4fc9a95..7c0908f18e28b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2457,6 +2457,61 @@ static bool isWorthFoldingAdd(SDValue Add) {
   return true;
 }
 
+// To prevent SelectAddrRegImm from folding offsets that conflicts with the
+// fusion of PseudoLIAddr, check if the offset of every use of a given address
+// is within the alignment
+static bool areUserOffsetsWithinAlignment(SDValue Addr, Align Alignment) {
+  for (auto *Use : Addr->uses()) {
+    if (!Use->isMachineOpcode()) {
+      // Don't allow stores of the value. It must be used as the address.
+      if (Use->getOpcode() == ISD::STORE &&
+          cast<StoreSDNode>(Use)->getValue() == Addr)
+        return false;
+      if (Use->getOpcode() == ISD::ATOMIC_STORE &&
+          cast<AtomicSDNode>(Use)->getVal() == Addr)
+        return false;
+      // If the user is direct load/store, there is no offset.
+      if (Use->getOpcode() == ISD::LOAD || Use->getOpcode() == ISD::STORE ||
+          Use->getOpcode() == ISD::ATOMIC_LOAD ||
+          Use->getOpcode() == ISD::ATOMIC_STORE)
+        continue;
+      if (Use->getOpcode() == ISD::ADD &&
+          isa<ConstantSDNode>(Use->getOperand(1)) &&
+          Alignment > cast<ConstantSDNode>(Use->getOperand(1))->getSExtValue())
+        continue;
+
+      return false;
+    }
+
+    // If user is already selected, get offsets from load/store instructions
+    unsigned int Opcode = Use->getMachineOpcode();
+    if (Opcode == RISCV::LB || Opcode == RISCV::LBU || Opcode == RISCV::LH ||
+        Opcode == RISCV::LHU || Opcode == RISCV::LW || Opcode == RISCV::LWU ||
+        Opcode == RISCV::LD || Opcode == RISCV::FLH || Opcode == RISCV::FLW ||
+        Opcode == RISCV::FLD) {
+      if (auto *Offset = dyn_cast<ConstantSDNode>(Use->getOperand(1))) {
+        if (Offset->isZero() || Alignment > Offset->getSExtValue())
+          continue;
+      }
+      return false;
+    }
+    if (Opcode == RISCV::SB || Opcode == RISCV::SH || Opcode == RISCV::SW ||
+        Opcode == RISCV::SD || Opcode == RISCV::FSH || Opcode == RISCV::FSW ||
+        Opcode == RISCV::FSD) {
+      // Also check if Addr is used as the value of store.
+      if (Use->getOperand(0) == Addr)
+        return false;
+      if (auto *Offset = dyn_cast<ConstantSDNode>(Use->getOperand(2))) {
+        if (Offset->isZero() || Alignment > Offset->getSExtValue())
+          continue;
+      }
+      return false;
+    }
+    return false;
+  }
+
+  return true;
+}
 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
                                               unsigned MaxShiftAmount,
                                               SDValue &Base, SDValue &Index,
@@ -2520,9 +2575,21 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
   MVT VT = Addr.getSimpleValueType();
 
   if (Addr.getOpcode() == RISCVISD::ADD_LO) {
-    Base = Addr.getOperand(0);
-    Offset = Addr.getOperand(1);
-    return true;
+    bool CanFold = true;
+    // Unconditionally fold if operand 1 is not a global address (e.g.
+    // externsymbol)
+    if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
+      const DataLayout &DL = CurDAG->getDataLayout();
+      Align Alignment = commonAlignment(
+          GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
+      if (!areUserOffsetsWithinAlignment(Addr, Alignment))
+        CanFold = false;
+    }
+    if (CanFold) {
+      Base = Addr.getOperand(0);
+      Offset = Addr.getOperand(1);
+      return true;
+    }
   }
 
   int64_t RV32ZdinxRange = IsINX ? 4 : 0;
@@ -2541,7 +2608,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
           const DataLayout &DL = CurDAG->getDataLayout();
           Align Alignment = commonAlignment(
               GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
-          if (CVal == 0 || Alignment > CVal) {
+          if (areUserOffsetsWithinAlignment(Base, Alignment)) {
             int64_t CombinedOffset = CVal + GA->getOffset();
             Base = Base.getOperand(0);
             Offset = CurDAG->getTargetGlobalAddress(
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 9d574edb4e6d1..8903ddc1903af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1311,6 +1311,22 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
 
 /// HI and ADD_LO address nodes.
 
+let Size = 8, isReMaterializable = 1 in
+def PseudoLIaddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>,
+                     Sched<[WriteIALU]>;
+
+def LUIADDI : PatFrag<(ops node:$hi, node:$lo),
+                      (riscv_add_lo (riscv_hi node:$hi), node:$lo)>;
+
+def : Pat<(LUIADDI tglobaladdr:$hi, tglobaladdr:$lo),
+          (PseudoLIaddr tglobaladdr:$hi, tglobaladdr:$lo)>;
+def : Pat<(LUIADDI tblockaddress:$hi, tblockaddress:$lo),
+          (PseudoLIaddr tblockaddress:$hi, tblockaddress:$lo)>;
+def : Pat<(LUIADDI tjumptable:$hi, tjumptable:$lo),
+          (PseudoLIaddr tjumptable:$hi, tjumptable:$lo)>;
+def : Pat<(LUIADDI tconstpool:$hi, tconstpool:$lo),
+          (PseudoLIaddr tconstpool:$hi, tconstpool:$lo)>;
+
 def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
 def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
 def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 410989177a8b9..1b8ad38682b55 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
 //    3) The offset value in the Global Address or Constant Pool is 0.
 bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
                                              MachineInstr *&Lo) {
-  if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
+  if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
+      Hi.getOpcode() != RISCV::PseudoLIaddr)
     return false;
 
   const MachineOperand &HiOp1 = Hi.getOperand(1);
@@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
       HiOp1.getOffset() != 0)
     return false;
 
-  Register HiDestReg = Hi.getOperand(0).getReg();
-  if (!MRI->hasOneUse(HiDestReg))
-    return false;
+  if (Hi.getOpcode() == RISCV::PseudoLIaddr) {
+    // Most of the code should handle it correctly without modification by
+    // setting Lo and Hi both point to PseudoLIaddr
+    Lo = &Hi;
+  } else {
+    Register HiDestReg = Hi.getOperand(0).getReg();
+    if (!MRI->hasOneUse(HiDestReg))
+      return false;
 
-  Lo = &*MRI->use_instr_begin(HiDestReg);
-  if (Lo->getOpcode() != RISCV::ADDI)
-    return false;
+    Lo = &*MRI->use_instr_begin(HiDestReg);
+    if (Lo->getOpcode() != RISCV::ADDI)
+      return false;
+  }
 
   const MachineOperand &LoOp2 = Lo->getOperand(2);
-  if (Hi.getOpcode() == RISCV::LUI) {
+  if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoLIaddr) {
     if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
         !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
         LoOp2.getOffset() != 0)
@@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
 
   Hi.getOperand(1).setOffset(NewOffset);
   MachineOperand &ImmOp = Lo.getOperand(2);
+  // Expand PseudoLIaddr into LUI
+  if (Hi.getOpcode() == RISCV::PseudoLIaddr) {
+    auto *TII = ST->getInstrInfo();
+    Hi.setDesc(TII->get(RISCV::LUI));
+    Hi.removeOperand(2);
+  }
+
   if (Hi.getOpcode() != RISCV::AUIPC)
     ImmOp.setOffset(NewOffset);
 
@@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
     }
   }
 
+  // Prevent Lo (originally PseudoLIaddr, which is also pointed by Hi) from
+  // being erased
+  if (&Lo == &Hi)
+    return true;
+
   MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
   Lo.eraseFromParent();
   return true;
diff --git a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
index 52f2ce27164d6..ce82fbea10063 100644
--- a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
@@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                 MachineBasicBlock::iterator &NextMBBI);
   bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+  bool expandLIaddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
 };
 
 char RISCVPostRAExpandPseudo::ID = 0;
@@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
   switch (MBBI->getOpcode()) {
   case RISCV::PseudoMovImm:
     return expandMovImm(MBB, MBBI);
+  case RISCV::PseudoLIaddr:
+    return expandLIaddr(MBB, MBBI);
   default:
     return false;
   }
@@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
   return true;
 }
 
+bool RISCVPostRAExpandPseudo::expandLIaddr(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MBBI) {
+  DebugLoc DL = MBBI->getDebugLoc();
+
+  Register DstReg = MBBI->getOperand(0).getReg();
+  bool DstIsDead = MBBI->getOperand(0).isDead();
+  bool Renamable = MBBI->getOperand(0).isRenamable();
+
+  BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI))
+      .addReg(DstReg, RegState::Define | getRenamableRegState(Renamable))
+      .add(MBBI->getOperand(1));
+  BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI))
+      .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) |
+                          getRenamableRegState(Renamable))
+      .addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable))
+      .add(MBBI->getOperand(2));
+  MBBI->eraseFromParent();
+  return true;
+}
+
 } // end of anonymous namespace
 
 INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
diff --git a/llvm/test/CodeGen/RISCV/bfloat-mem.ll b/llvm/test/CodeGen/RISCV/bfloat-mem.ll
index 4b6c0c29d660b..39340c85cfadc 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-mem.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-mem.ll
@@ -53,11 +53,11 @@ define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
 ; CHECK-NEXT:    fadd.s fa5, fa4, fa5
 ; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
 ; CHECK-NEXT:    lui a0, %hi(G)
-; CHECK-NEXT:    flh fa5, %lo(G)(a0)
-; CHECK-NEXT:    addi a1, a0, %lo(G)
-; CHECK-NEXT:    fsh fa0, %lo(G)(a0)
-; CHECK-NEXT:    flh fa5, 18(a1)
-; CHECK-NEXT:    fsh fa0, 18(a1)
+; CHECK-NEXT:    addi a0, a0, %lo(G)
+; CHECK-NEXT:    flh fa5, 0(a0)
+; CHECK-NEXT:    fsh fa0, 0(a0)
+; CHECK-NEXT:    flh fa5, 18(a0)
+; CHECK-NEXT:    fsh fa0, 18(a0)
 ; CHECK-NEXT:    ret
   %1 = fadd bfloat %a, %b
   %2 = load volatile bfloat, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/byval.ll b/llvm/test/CodeGen/RISCV/byval.ll
index 9151f3b03e7c2..c5e48ee75e482 100644
--- a/llvm/test/CodeGen/RISCV/byval.ll
+++ b/llvm/test/CodeGen/RISCV/byval.ll
@@ -22,15 +22,15 @@ define void @caller() nounwind {
 ; RV32I-NEXT:    addi sp, sp, -32
 ; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lui a0, %hi(foo)
-; RV32I-NEXT:    lw a1, %lo(foo)(a0)
-; RV32I-NEXT:    sw a1, 12(sp)
 ; RV32I-NEXT:    addi a0, a0, %lo(foo)
 ; RV32I-NEXT:    lw a1, 12(a0)
 ; RV32I-NEXT:    sw a1, 24(sp)
 ; RV32I-NEXT:    lw a1, 8(a0)
 ; RV32I-NEXT:    sw a1, 20(sp)
-; RV32I-NEXT:    lw a0, 4(a0)
-; RV32I-NEXT:    sw a0, 16(sp)
+; RV32I-NEXT:    lw a1, 4(a0)
+; RV32I-NEXT:    sw a1, 16(sp)
+; RV32I-NEXT:    lw a0, 0(a0)
+; RV32I-NEXT:    sw a0, 12(sp)
 ; RV32I-NEXT:    addi a0, sp, 12
 ; RV32I-NEXT:    call callee
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
index 2122b3fd91788..036daf587eda0 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
@@ -28,281 +28,281 @@ define void @callee() nounwind {
 ; ILP32-LABEL: callee:
 ; ILP32:       # %bb.0:
 ; ILP32-NEXT:    lui a0, %hi(var)
-; ILP32-NEXT:    flw fa5, %lo(var)(a0)
-; ILP32-NEXT:    flw fa4, %lo(var+4)(a0)
-; ILP32-NEXT:    flw fa3, %lo(var+8)(a0)
-; ILP32-NEXT:    flw fa2, %lo(var+12)(a0)
-; ILP32-NEXT:    addi a1, a0, %lo(var)
-; ILP32-NEXT:    flw fa1, 16(a1)
-; ILP32-NEXT:    flw fa0, 20(a1)
-; ILP32-NEXT:    flw ft0, 24(a1)
-; ILP32-NEXT:    flw ft1, 28(a1)
-; ILP32-NEXT:    flw ft2, 32(a1)
-; ILP32-NEXT:    flw ft3, 36(a1)
-; ILP32-NEXT:    flw ft4, 40(a1)
-; ILP32-NEXT:    flw ft5, 44(a1)
-; ILP32-NEXT:    flw ft6, 48(a1)
-; ILP32-NEXT:    flw ft7, 52(a1)
-; ILP32-NEXT:    flw fa6, 56(a1)
-; ILP32-NEXT:    flw fa7, 60(a1)
-; ILP32-NEXT:    flw ft8, 64(a1)
-; ILP32-NEXT:    flw ft9, 68(a1)
-; ILP32-NEXT:    flw ft10, 72(a1)
-; ILP32-NEXT:    flw ft11, 76(a1)
-; ILP32-NEXT:    flw fs0, 80(a1)
-; ILP32-NEXT:    flw fs1, 84(a1)
-; ILP32-NEXT:    flw fs2, 88(a1)
-; ILP32-NEXT:    flw fs3, 92(a1)
-; ILP32-NEXT:    flw fs4, 96(a1)
-; ILP32-NEXT:    flw fs5, 100(a1)
-; ILP32-NEXT:    flw fs6, 104(a1)
-; ILP32-NEXT:    flw fs7, 108(a1)
-; ILP32-NEXT:    flw fs8, 124(a1)
-; ILP32-NEXT:    flw fs9, 120(a1)
-; ILP32-NEXT:    flw fs10, 116(a1)
-; ILP32-NEXT:    flw fs11, 112(a1)
-; ILP32-NEXT:    fsw fs8, 124(a1)
-; ILP32-NEXT:    fsw fs9, 120(a1)
-; ILP32-NEXT:    fsw fs10, 116(a1)
-; ILP32-NEXT:    fsw fs11, 112(a1)
-; ILP32-NEXT:    fsw fs7, 108(a1)
-; ILP32-NEXT:    fsw fs6, 104(a1)
-; ILP32-NEXT:    fsw fs5, 100(a1)
-; ILP32-NEXT:    fsw fs4, 96(a1)
-; ILP32-NEXT:    fsw fs3, 92(a1)
-; ILP32-NEXT:    fsw fs2, 88(a1)
-; ILP32-NEXT:    fsw fs1, 84(a1)
-; ILP32-NEXT:    fsw fs0, 80(a1)
-; ILP32-NEXT:    fsw ft11, 76(a1)
-; ILP32-NEXT:    fsw ft10, 72(a1)
-; ILP32-NEXT:    fsw ft9, 68(a1)
-; ILP32-NEXT:    fsw ft8, 64(a1)
-; ILP32-NEXT:    fsw fa7, 60(a1)
-; ILP32-NEXT:    fsw fa6, 56(a1)
-; ILP32-NEXT:    fsw ft7, 52(a1)
-; ILP32-NEXT:    fsw ft6, 48(a1)
-; ILP32-NEXT:    fsw ft5, 44(a1)
-; ILP32-NEXT:    fsw ft4, 40(a1)
-; ILP32-NEXT:    fsw ft3, 36(a1)
-; ILP32-NEXT:    fsw ft2, 32(a1)
-; ILP32-NEXT:    fsw ft1, 28(a1)
-; ILP32-NEXT:    fsw ft0, 24(a1)
-; ILP32-NEXT:    fsw fa0, 20(a1)
-; ILP32-NEXT:    fsw fa1, 16(a1)
-; ILP32-NEXT:    fsw fa2, %lo(var+12)(a0)
-; ILP32-NEXT:    fsw fa3, %lo(var+8)(a0)
-; ILP32-NEXT:    fsw fa4, %lo(var+4)(a0)
-; ILP32-NEXT:    fsw fa5, %lo(var)(a0)
+; ILP32-NEXT:    addi a0, a0, %lo(var)
+; ILP32-NEXT:    flw fa5, 0(a0)
+; ILP32-NEXT:    flw fa4, 4(a0)
+; ILP32-NEXT:    flw fa3, 8(a0)
+; ILP32-NEXT:    flw fa2, 12(a0)
+; ILP32-NEXT:    flw fa1, 16(a0)
+; ILP32-NEXT:    flw fa0, 20(a0)
+; ILP32-NEXT:    flw ft0, 24(a0)
+; ILP32-NEXT:    flw ft1, 28(a0)
+; ILP32-NEXT:    flw ft2, 32(a0)
+; ILP32-NEXT:    flw ft3, 36(a0)
+; ILP32-NEXT:    flw ft4, 40(a0)
+; ILP32-NEXT:    flw ft5, 44(a0)
+; ILP32-NEXT:    flw ft6, 48(a0)
+; ILP32-NEXT:    flw ft7, 52(a0)
+; ILP32-NEXT:    flw fa6, 56(a0)
+; ILP32-NEXT:    flw fa7, 60(a0)
+; ILP32-NEXT:    flw ft8, 64(a0)
+; ILP32-NEXT:    flw ft9, 68(a0)
+; ILP32-NEXT:    flw ft10, 72(a0)
+; ILP32-NEXT:    flw ft11, 76(a0)
+; ILP32-NEXT:    flw fs0, 80(a0)
+; ILP32-NEXT:    flw fs1, 84(a0)
+; ILP32-NEXT:    flw fs2, 88(a0)
+; ILP32-NEXT:    flw fs3, 92(a0)
+; ILP32-NEXT:    flw fs4, 96(a0)
+; ILP32-NEXT:    flw fs5, 100(a0)
+; ILP32-NEXT:    flw fs6, 104(a0)
+; ILP32-NEXT:    flw fs7, 108(a0)
+; ILP32-NEXT:    flw fs8, 124(a0)
+; ILP32-NEXT:    flw fs9, 120(a0)
+; ILP32-NEXT:    flw fs10, 116(a0)
+; ILP32-NEXT:    flw fs11, 112(a0)
+; ILP32-NEXT:    fsw fs8, 124(a0)
+; ILP32-NEXT:    fsw fs9, 120(a0)
+; ILP32-NEXT:    fsw fs10, 116(a0)
+; ILP32-NEXT:    fsw fs11, 112(a0)
+; ILP32-NEXT:    fsw fs7, 108(a0)
+; ILP32-NEXT:    fsw fs6, 104(a0)
+; ILP32-NEXT:    fsw fs5, 100(a0)
+; ILP32-NEXT:    fsw fs4, 96(a0)
+; ILP32-NEXT:    fsw fs3, 92(a0)
+; ILP32-NEXT:    fsw fs2, 88(a0)
+; ILP32-NEXT:    fsw fs1, 84(a0)
+; ILP32-NEXT:    fsw fs0, 80(a0)
+; ILP32-NEXT:    fsw ft11, 76(a0)
+; ILP32-NEXT:    fsw ft10, 72(a0)
+; ILP32-NEXT:    fsw ft9, 68(a0)
+; ILP32-NEXT:    fsw ft8, 64(a0)
+; ILP32-NEXT:    fsw fa7, 60(a0)
+; ILP32-NEXT:    fsw fa6, 56(a0)
+; ILP32-NEXT:    fsw ft7, 52(a0)
+; ILP32-NEXT:    fsw ft6, 48(a0)
+; ILP32-NEXT:    fsw ft5, 44(a0)
+; ILP32-NEXT:    fsw ft4, 40(a0)
+; ILP32-NEXT:    fsw ft3, 36(a0)
+; ILP32-NEXT:    fsw ft2, 32(a0)
+; ILP32-NEXT:    fsw ft1, 28(a0)
+; ILP32-NEXT:    fsw ft0, 24(a0)
+; ILP32-NEXT:    fsw fa0, 20(a0)
+; ILP32-NEXT:    fsw fa1, 16(a0)
+; ILP32-NEXT:    fsw fa2, 12(a0)
+; ILP32-NEXT:    fsw fa3, 8(a0)
+; ILP32-NEXT:    fsw fa4, 4(a0)
+; ILP32-NEXT:    fsw fa5, 0(a0)
 ; ILP32-NEXT:    ret
 ;
 ; ILP32E-LABEL: callee:
 ; ILP32E:       # %bb.0:
 ; ILP32E-NEXT:    lui a0, %hi(var)
-; ILP32E-NEXT:    flw fa5, %lo(var)(a0)
-; ILP32E-NEXT:    flw fa4, %lo(var+4)(a0)
-; ILP32E-NEXT:    flw fa3, %lo(var+8)(a0)
-; ILP32E-NEXT:    flw fa2, %lo(var+12)(a0)
-; ILP32E-NEXT:    addi a1, a0, %lo(var)
-; ILP32E-NEXT:    flw fa1, 16(a1)
-; ILP32E-NEXT:    flw fa0, 20(a1)
-; ILP32E-NEXT:    flw ft0, 24(a1)
-; ILP32E-NEXT:    flw ft1, 28(a1)
-; ILP32E-NEXT:    flw ft2, 32(a1)
-; ILP32E-NEXT:    flw ft3, 36(a1)
-; ILP32E-NEXT:    flw ft4, 40(a1)
-; ILP32E-NEXT:    flw ft5, 44(a1)
-; ILP32E-NEXT:    flw ft6, 48(a1)
-; ILP32E-NEXT:    flw ft7, 52(a1)
-; ILP32E-NEXT:    flw fa6, 56(a1)
-; ILP32E-NEXT:    flw fa7, 60(a1)
-; ILP32E-NEXT:    flw ft8, 64(a1)
-; ILP32E-NEXT:    flw ft9, 68(a1)
-; ILP32E-NEXT:    flw ft10, 72(a1)
-; ILP32E-NEXT:    flw ft11, 76(a1)
-; ILP32E-NEXT:    flw fs0, 80(a1)
-; ILP32E-NEXT:    flw fs1, 84(a1)
-; ILP32E-NEXT:    flw fs2, 88(a1)
-; ILP32E-NEXT:    flw fs3, 92(a1)
-; ILP32E-NEXT:    flw fs4, 96(a1)
-; ILP32E-NEXT:    flw fs5, 100(a1)
-; ILP32E-NEXT:    flw fs6, 104(a1)
-; ILP32E-NEXT:    flw fs7, 108(a1)
-; ILP32E-NEXT:    flw fs8, 124(a1)
-; ILP32E-NEXT:    flw fs9, 120(a1)
-; ILP32E-NEXT:    flw fs10, 116(a1)
-; ILP32E-NEXT:    flw fs11, 112(a1)
-; ILP32E-NEXT:    fsw fs8, 124(a1)
-; ILP32E-NEXT:    fsw fs9, 120(a1)
-; ILP32E-NEXT:    fsw fs10, 116(a1)
-; ILP32E-NEXT:    fsw fs11, 112(a1)
-; ILP32E-NEXT:    fsw fs7, 108(a1)
-; ILP32E-NEXT:    fsw fs6, 104(a1)
-; ILP32E-NEXT:    fsw fs5, 100(a1)
-; ILP32E-NEXT:    fsw fs4, 96(a1)
-; ILP32E-NEXT:    fsw fs3, 92(a1)
-; ILP32E-NEXT:    fsw fs2, 88(a1)
-; ILP32E-NEXT:    fsw fs1, 84(a1)
-; ILP32E-NEXT:    fsw fs0, 80(a1)
-; ILP32E-NEXT:    fsw ft11, 76(a1)
-; ILP32E-NEXT:    fsw ft10, 72(a1)
-; ILP32E-NEXT:    fsw ft9, 68(a1)
-; ILP32E-NEXT:    fsw ft8, 64(a1)
-; ILP32E-NEXT:    fsw fa7, 60(a1)
-; ILP32E-NEXT:    fsw fa6, 56(a1)
-; ILP32E-NEXT:    fsw ft7, 52(a1)
-; ILP32E-NEXT:    fsw ft6, 48(a1)
-; ILP32E-NEXT:    fsw ft5, 44(a1)
-; ILP32E-NEXT:    fsw ft4, 40(a1)
-; ILP32E-NEXT:    fsw ft3, 36(a1)
-; ILP32E-NEXT:    fsw ft2, 32(a1)
-; ILP32E-NEXT:    fsw ft1, 28(a1)
-; ILP32E-NEXT:    fsw ft0, 24(a1)
-; ILP32E-NEXT:    fsw fa0, 20(a1)
-; ILP32E-NEXT:    fsw fa1, 16(a1)
-; ILP32E-NEXT:    fsw fa2, %lo(var+12)(a0)
-; ILP32E-NEXT:    fsw fa3, %lo(var+8)(a0)
-; ILP32E-NEXT:    fsw fa4, %lo(var+4)(a0)
-; ILP32E-NEXT:    fsw fa5, %lo(var)(a0)
+; ILP32E-NEXT:    addi a0, a0, %lo(var)
+; ILP32E-NEXT:    flw fa5, 0(a0)
+; ILP32E-NEXT:    flw fa4, 4(a0)
+; ILP32E-NEXT:    flw fa3, 8(a0)
+; ILP32E-NEXT:    flw fa2, 12(a0)
+; ILP32E-NEXT:    flw fa1, 16(a0)
+; ILP32E-NEXT:    flw fa0, 20(a0)
+; ILP32E-NEXT:    flw ft0, 24(a0)
+; ILP32E-NEXT:    flw ft1, 28(a0)
+; ILP32E-NEXT:    flw ft2, 32(a0)
+; ILP32E-NEXT:    flw ft3, 36(a0)
+; ILP32E-NEXT:    flw ft4, 40(a0)
+; ILP32E-NEXT:    flw ft5, 44(a0)
+; ILP32E-NEXT:    flw ft6, 48(a0)
+; ILP32E-NEXT:    flw ft7, 52(a0)
+; ILP32E-NEXT:    flw fa6, 56(a0)
+; ILP32E-NEXT:    flw fa7, 60(a0)
+; ILP32E-NEXT:    flw ft8, 64(a0)
+; ILP32E-NEXT:    flw ft9, 68(a0)
+; ILP32E-NEXT:    flw ft10, 72(a0)
+; ILP32E-NEXT:    flw ft11, 76(a0)
+; ILP32E-NEXT:    flw fs0, 80(a0)
+; ILP32E-NEXT:    flw fs1, 84(a0)
+; ILP32E-NEXT:    flw fs2, 88(a0)
+; ILP32E-NEXT:    flw fs3, 92(a0)
+; ILP32E-NEXT:    flw fs4, 96(a0)
+; ILP32E-NEXT:    flw fs5, 100(a0)
+; ILP32E-NEXT:    flw fs6, 1...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/93142


More information about the llvm-commits mailing list