[llvm] [RISCV][RFC] Prevent folding ADD_LO into load/store if we can't fold all uses. (PR #155935)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 28 15:35:41 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

If we don't fold all uses, we end up with an LUI that is used by an ADDI and some loads/stores. This requires the LUI to write a different register than the ADDI or the load/stores uses have to be scheduled between the LUI and ADDI. It prevents macrofusion of the LUI+ADDI on CPUs that support it. It prevents the use of PseudoMovAddr which prevents the LUI+ADDI from being rematerializable.

This is based on a patch we have had in our downstream for a while that we originally wrote because of macrofusion and rematerialization. I no longer have any relevant performance or code size numbers for it.

---

Patch is 919.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155935.diff


29 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+71-4) 
- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h (+2) 
- (modified) llvm/test/CodeGen/RISCV/bfloat-mem.ll (+5-5) 
- (modified) llvm/test/CodeGen/RISCV/byval.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll (+1186-1202) 
- (modified) llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll (+724-734) 
- (modified) llvm/test/CodeGen/RISCV/callee-saved-gprs.ll (+1436-1474) 
- (modified) llvm/test/CodeGen/RISCV/double-mem.ll (+24-24) 
- (modified) llvm/test/CodeGen/RISCV/float-mem.ll (+10-10) 
- (modified) llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll (+10-10) 
- (modified) llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/global-merge-minsize.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/global-merge-offset.ll (+7-7) 
- (modified) llvm/test/CodeGen/RISCV/global-merge.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/half-mem.ll (+20-20) 
- (modified) llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll (+2-2) 
- (modified) llvm/test/CodeGen/RISCV/mem.ll (+5-5) 
- (modified) llvm/test/CodeGen/RISCV/mem64.ll (+5-5) 
- (modified) llvm/test/CodeGen/RISCV/push-pop-popret.ll (+1451-1503) 
- (modified) llvm/test/CodeGen/RISCV/qci-interrupt-attr-fpr.ll (+672-680) 
- (modified) llvm/test/CodeGen/RISCV/qci-interrupt-attr.ll (+1328-1368) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll (+6-6) 
- (modified) llvm/test/CodeGen/RISCV/saverestore.ll (+24-24) 
- (modified) llvm/test/CodeGen/RISCV/xqccmp-callee-saved-gprs.ll (+503-523) 
- (modified) llvm/test/CodeGen/RISCV/xqccmp-push-pop-popret.ll (+1367-1413) 
- (modified) llvm/test/CodeGen/RISCV/xqcilsm-memset.ll (+14-14) 
- (modified) llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll (+55-55) 
- (modified) llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll (+4-4) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index f9f35f66319b5..c5b68ccd1ba8d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2951,6 +2951,60 @@ static bool isWorthFoldingAdd(SDValue Add) {
   return true;
 }
 
+bool isRegImmLoadOrStore(SDNode *User, SDValue Add) {
+  // If the user is a load or store, then the offset is 0.
+  if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
+      User->getOpcode() != RISCVISD::LD_RV32 &&
+      User->getOpcode() != RISCVISD::SD_RV32 &&
+      User->getOpcode() != ISD::ATOMIC_LOAD &&
+      User->getOpcode() != ISD::ATOMIC_STORE)
+    return false;
+
+  // Don't allow stores of the value. It must be used as the address.
+  if (User->getOpcode() == ISD::STORE &&
+      cast<StoreSDNode>(User)->getValue() == Add)
+    return false;
+  if (User->getOpcode() == RISCVISD::SD_RV32 &&
+      (User->getOperand(0) == Add || User->getOperand(1) == Add))
+    return false;
+  if (User->getOpcode() == ISD::ATOMIC_STORE &&
+      cast<AtomicSDNode>(User)->getVal() == Add)
+    return false;
+
+  return true;
+}
+
+// To prevent SelectAddrRegImm from folding offsets that conflicts with the
+// fusion of PseudoMovAddr, check if the offset of every use of a given address
+// is within the alignment.
+bool RISCVDAGToDAGISel::areOffsetsWithinAlignment(SDValue Addr,
+                                                  Align Alignment) {
+  assert(Addr->getOpcode() == RISCVISD::ADD_LO);
+  for (auto *User : Addr->users()) {
+    // If the user is a load or store, then the offset is 0 which is always
+    // within alignment.
+    if (isRegImmLoadOrStore(User, Addr))
+      continue;
+
+    if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
+      int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
+      if (!isInt<12>(CVal) || Alignment <= CVal)
+        return false;
+
+      // Make sure all uses are foldable load/stores.
+      for (auto *AddUser : User->users())
+        if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
+          return false;
+
+      continue;
+    }
+
+    return false;
+  }
+
+  return true;
+}
+
 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
                                          SDValue &Offset) {
   if (SelectAddrFrameIndex(Addr, Base, Offset))
@@ -2960,9 +3014,21 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
   MVT VT = Addr.getSimpleValueType();
 
   if (Addr.getOpcode() == RISCVISD::ADD_LO) {
-    Base = Addr.getOperand(0);
-    Offset = Addr.getOperand(1);
-    return true;
+    bool CanFold = true;
+    // Unconditionally fold if operand 1 is not a global address (e.g.
+    // externsymbol)
+    if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
+      const DataLayout &DL = CurDAG->getDataLayout();
+      Align Alignment = commonAlignment(
+          GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
+      if (!areOffsetsWithinAlignment(Addr, Alignment))
+        CanFold = false;
+    }
+    if (CanFold) {
+      Base = Addr.getOperand(0);
+      Offset = Addr.getOperand(1);
+      return true;
+    }
   }
 
   if (CurDAG->isBaseWithConstantOffset(Addr)) {
@@ -2980,7 +3046,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
           const DataLayout &DL = CurDAG->getDataLayout();
           Align Alignment = commonAlignment(
               GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
-          if ((CVal == 0 || Alignment > CVal)) {
+          if ((CVal == 0 || Alignment > CVal) &&
+              areOffsetsWithinAlignment(Base, Alignment)) {
             int64_t CombinedOffset = CVal + GA->getOffset();
             Base = Base.getOperand(0);
             Offset = CurDAG->getTargetGlobalAddress(
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index c329a4c6ec62e..89217e1487bbc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -45,6 +45,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
                                     InlineAsm::ConstraintCode ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
+  bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment);
+
   bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
   bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
   bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset);
diff --git a/llvm/test/CodeGen/RISCV/bfloat-mem.ll b/llvm/test/CodeGen/RISCV/bfloat-mem.ll
index f9cf4e523b77d..cccbb04e6ae99 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-mem.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-mem.ll
@@ -51,13 +51,13 @@ define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
 ; CHECK-NEXT:    fcvt.s.bf16 fa5, fa1
 ; CHECK-NEXT:    fcvt.s.bf16 fa4, fa0
 ; CHECK-NEXT:    lui a0, %hi(G)
+; CHECK-NEXT:    addi a0, a0, %lo(G)
 ; CHECK-NEXT:    fadd.s fa5, fa4, fa5
-; CHECK-NEXT:    flh fa4, %lo(G)(a0)
 ; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK-NEXT:    addi a1, a0, %lo(G)
-; CHECK-NEXT:    fsh fa0, %lo(G)(a0)
-; CHECK-NEXT:    flh fa5, 18(a1)
-; CHECK-NEXT:    fsh fa0, 18(a1)
+; CHECK-NEXT:    flh fa5, 0(a0)
+; CHECK-NEXT:    fsh fa0, 0(a0)
+; CHECK-NEXT:    flh fa5, 18(a0)
+; CHECK-NEXT:    fsh fa0, 18(a0)
 ; CHECK-NEXT:    ret
   %1 = fadd bfloat %a, %b
   %2 = load volatile bfloat, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/byval.ll b/llvm/test/CodeGen/RISCV/byval.ll
index 9151f3b03e7c2..c5e48ee75e482 100644
--- a/llvm/test/CodeGen/RISCV/byval.ll
+++ b/llvm/test/CodeGen/RISCV/byval.ll
@@ -22,15 +22,15 @@ define void @caller() nounwind {
 ; RV32I-NEXT:    addi sp, sp, -32
 ; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lui a0, %hi(foo)
-; RV32I-NEXT:    lw a1, %lo(foo)(a0)
-; RV32I-NEXT:    sw a1, 12(sp)
 ; RV32I-NEXT:    addi a0, a0, %lo(foo)
 ; RV32I-NEXT:    lw a1, 12(a0)
 ; RV32I-NEXT:    sw a1, 24(sp)
 ; RV32I-NEXT:    lw a1, 8(a0)
 ; RV32I-NEXT:    sw a1, 20(sp)
-; RV32I-NEXT:    lw a0, 4(a0)
-; RV32I-NEXT:    sw a0, 16(sp)
+; RV32I-NEXT:    lw a1, 4(a0)
+; RV32I-NEXT:    sw a1, 16(sp)
+; RV32I-NEXT:    lw a0, 0(a0)
+; RV32I-NEXT:    sw a0, 12(sp)
 ; RV32I-NEXT:    addi a0, sp, 12
 ; RV32I-NEXT:    call callee
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
index 337e9bc5845f9..2999a7e4981bc 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
@@ -28,281 +28,281 @@ define void @callee() nounwind {
 ; ILP32-LABEL: callee:
 ; ILP32:       # %bb.0:
 ; ILP32-NEXT:    lui a0, %hi(var)
-; ILP32-NEXT:    flw fa5, %lo(var)(a0)
-; ILP32-NEXT:    flw fa4, %lo(var+4)(a0)
-; ILP32-NEXT:    flw fa3, %lo(var+8)(a0)
-; ILP32-NEXT:    flw fa2, %lo(var+12)(a0)
-; ILP32-NEXT:    addi a1, a0, %lo(var)
-; ILP32-NEXT:    flw fa1, 16(a1)
-; ILP32-NEXT:    flw fa0, 20(a1)
-; ILP32-NEXT:    flw ft0, 24(a1)
-; ILP32-NEXT:    flw ft1, 28(a1)
-; ILP32-NEXT:    flw ft2, 32(a1)
-; ILP32-NEXT:    flw ft3, 36(a1)
-; ILP32-NEXT:    flw ft4, 40(a1)
-; ILP32-NEXT:    flw ft5, 44(a1)
-; ILP32-NEXT:    flw ft6, 48(a1)
-; ILP32-NEXT:    flw ft7, 52(a1)
-; ILP32-NEXT:    flw fa6, 56(a1)
-; ILP32-NEXT:    flw fa7, 60(a1)
-; ILP32-NEXT:    flw ft8, 64(a1)
-; ILP32-NEXT:    flw ft9, 68(a1)
-; ILP32-NEXT:    flw ft10, 72(a1)
-; ILP32-NEXT:    flw ft11, 76(a1)
-; ILP32-NEXT:    flw fs0, 80(a1)
-; ILP32-NEXT:    flw fs1, 84(a1)
-; ILP32-NEXT:    flw fs2, 88(a1)
-; ILP32-NEXT:    flw fs3, 92(a1)
-; ILP32-NEXT:    flw fs4, 112(a1)
-; ILP32-NEXT:    flw fs5, 116(a1)
-; ILP32-NEXT:    flw fs6, 120(a1)
-; ILP32-NEXT:    flw fs7, 124(a1)
-; ILP32-NEXT:    flw fs8, 96(a1)
-; ILP32-NEXT:    flw fs9, 100(a1)
-; ILP32-NEXT:    flw fs10, 104(a1)
-; ILP32-NEXT:    flw fs11, 108(a1)
-; ILP32-NEXT:    fsw fs7, 124(a1)
-; ILP32-NEXT:    fsw fs6, 120(a1)
-; ILP32-NEXT:    fsw fs5, 116(a1)
-; ILP32-NEXT:    fsw fs4, 112(a1)
-; ILP32-NEXT:    fsw fs11, 108(a1)
-; ILP32-NEXT:    fsw fs10, 104(a1)
-; ILP32-NEXT:    fsw fs9, 100(a1)
-; ILP32-NEXT:    fsw fs8, 96(a1)
-; ILP32-NEXT:    fsw fs3, 92(a1)
-; ILP32-NEXT:    fsw fs2, 88(a1)
-; ILP32-NEXT:    fsw fs1, 84(a1)
-; ILP32-NEXT:    fsw fs0, 80(a1)
-; ILP32-NEXT:    fsw ft11, 76(a1)
-; ILP32-NEXT:    fsw ft10, 72(a1)
-; ILP32-NEXT:    fsw ft9, 68(a1)
-; ILP32-NEXT:    fsw ft8, 64(a1)
-; ILP32-NEXT:    fsw fa7, 60(a1)
-; ILP32-NEXT:    fsw fa6, 56(a1)
-; ILP32-NEXT:    fsw ft7, 52(a1)
-; ILP32-NEXT:    fsw ft6, 48(a1)
-; ILP32-NEXT:    fsw ft5, 44(a1)
-; ILP32-NEXT:    fsw ft4, 40(a1)
-; ILP32-NEXT:    fsw ft3, 36(a1)
-; ILP32-NEXT:    fsw ft2, 32(a1)
-; ILP32-NEXT:    fsw ft1, 28(a1)
-; ILP32-NEXT:    fsw ft0, 24(a1)
-; ILP32-NEXT:    fsw fa0, 20(a1)
-; ILP32-NEXT:    fsw fa1, 16(a1)
-; ILP32-NEXT:    fsw fa2, %lo(var+12)(a0)
-; ILP32-NEXT:    fsw fa3, %lo(var+8)(a0)
-; ILP32-NEXT:    fsw fa4, %lo(var+4)(a0)
-; ILP32-NEXT:    fsw fa5, %lo(var)(a0)
+; ILP32-NEXT:    addi a0, a0, %lo(var)
+; ILP32-NEXT:    flw fa5, 0(a0)
+; ILP32-NEXT:    flw fa4, 4(a0)
+; ILP32-NEXT:    flw fa3, 8(a0)
+; ILP32-NEXT:    flw fa2, 12(a0)
+; ILP32-NEXT:    flw fa1, 16(a0)
+; ILP32-NEXT:    flw fa0, 20(a0)
+; ILP32-NEXT:    flw ft0, 24(a0)
+; ILP32-NEXT:    flw ft1, 28(a0)
+; ILP32-NEXT:    flw ft2, 32(a0)
+; ILP32-NEXT:    flw ft3, 36(a0)
+; ILP32-NEXT:    flw ft4, 40(a0)
+; ILP32-NEXT:    flw ft5, 44(a0)
+; ILP32-NEXT:    flw ft6, 48(a0)
+; ILP32-NEXT:    flw ft7, 52(a0)
+; ILP32-NEXT:    flw fa6, 56(a0)
+; ILP32-NEXT:    flw fa7, 60(a0)
+; ILP32-NEXT:    flw ft8, 64(a0)
+; ILP32-NEXT:    flw ft9, 68(a0)
+; ILP32-NEXT:    flw ft10, 72(a0)
+; ILP32-NEXT:    flw ft11, 76(a0)
+; ILP32-NEXT:    flw fs0, 80(a0)
+; ILP32-NEXT:    flw fs1, 84(a0)
+; ILP32-NEXT:    flw fs2, 88(a0)
+; ILP32-NEXT:    flw fs3, 92(a0)
+; ILP32-NEXT:    flw fs4, 112(a0)
+; ILP32-NEXT:    flw fs5, 116(a0)
+; ILP32-NEXT:    flw fs6, 120(a0)
+; ILP32-NEXT:    flw fs7, 124(a0)
+; ILP32-NEXT:    flw fs8, 96(a0)
+; ILP32-NEXT:    flw fs9, 100(a0)
+; ILP32-NEXT:    flw fs10, 104(a0)
+; ILP32-NEXT:    flw fs11, 108(a0)
+; ILP32-NEXT:    fsw fs7, 124(a0)
+; ILP32-NEXT:    fsw fs6, 120(a0)
+; ILP32-NEXT:    fsw fs5, 116(a0)
+; ILP32-NEXT:    fsw fs4, 112(a0)
+; ILP32-NEXT:    fsw fs11, 108(a0)
+; ILP32-NEXT:    fsw fs10, 104(a0)
+; ILP32-NEXT:    fsw fs9, 100(a0)
+; ILP32-NEXT:    fsw fs8, 96(a0)
+; ILP32-NEXT:    fsw fs3, 92(a0)
+; ILP32-NEXT:    fsw fs2, 88(a0)
+; ILP32-NEXT:    fsw fs1, 84(a0)
+; ILP32-NEXT:    fsw fs0, 80(a0)
+; ILP32-NEXT:    fsw ft11, 76(a0)
+; ILP32-NEXT:    fsw ft10, 72(a0)
+; ILP32-NEXT:    fsw ft9, 68(a0)
+; ILP32-NEXT:    fsw ft8, 64(a0)
+; ILP32-NEXT:    fsw fa7, 60(a0)
+; ILP32-NEXT:    fsw fa6, 56(a0)
+; ILP32-NEXT:    fsw ft7, 52(a0)
+; ILP32-NEXT:    fsw ft6, 48(a0)
+; ILP32-NEXT:    fsw ft5, 44(a0)
+; ILP32-NEXT:    fsw ft4, 40(a0)
+; ILP32-NEXT:    fsw ft3, 36(a0)
+; ILP32-NEXT:    fsw ft2, 32(a0)
+; ILP32-NEXT:    fsw ft1, 28(a0)
+; ILP32-NEXT:    fsw ft0, 24(a0)
+; ILP32-NEXT:    fsw fa0, 20(a0)
+; ILP32-NEXT:    fsw fa1, 16(a0)
+; ILP32-NEXT:    fsw fa2, 12(a0)
+; ILP32-NEXT:    fsw fa3, 8(a0)
+; ILP32-NEXT:    fsw fa4, 4(a0)
+; ILP32-NEXT:    fsw fa5, 0(a0)
 ; ILP32-NEXT:    ret
 ;
 ; ILP32E-LABEL: callee:
 ; ILP32E:       # %bb.0:
 ; ILP32E-NEXT:    lui a0, %hi(var)
-; ILP32E-NEXT:    flw fa5, %lo(var)(a0)
-; ILP32E-NEXT:    flw fa4, %lo(var+4)(a0)
-; ILP32E-NEXT:    flw fa3, %lo(var+8)(a0)
-; ILP32E-NEXT:    flw fa2, %lo(var+12)(a0)
-; ILP32E-NEXT:    addi a1, a0, %lo(var)
-; ILP32E-NEXT:    flw fa1, 16(a1)
-; ILP32E-NEXT:    flw fa0, 20(a1)
-; ILP32E-NEXT:    flw ft0, 24(a1)
-; ILP32E-NEXT:    flw ft1, 28(a1)
-; ILP32E-NEXT:    flw ft2, 32(a1)
-; ILP32E-NEXT:    flw ft3, 36(a1)
-; ILP32E-NEXT:    flw ft4, 40(a1)
-; ILP32E-NEXT:    flw ft5, 44(a1)
-; ILP32E-NEXT:    flw ft6, 48(a1)
-; ILP32E-NEXT:    flw ft7, 52(a1)
-; ILP32E-NEXT:    flw fa6, 56(a1)
-; ILP32E-NEXT:    flw fa7, 60(a1)
-; ILP32E-NEXT:    flw ft8, 64(a1)
-; ILP32E-NEXT:    flw ft9, 68(a1)
-; ILP32E-NEXT:    flw ft10, 72(a1)
-; ILP32E-NEXT:    flw ft11, 76(a1)
-; ILP32E-NEXT:    flw fs0, 80(a1)
-; ILP32E-NEXT:    flw fs1, 84(a1)
-; ILP32E-NEXT:    flw fs2, 88(a1)
-; ILP32E-NEXT:    flw fs3, 92(a1)
-; ILP32E-NEXT:    flw fs4, 112(a1)
-; ILP32E-NEXT:    flw fs5, 116(a1)
-; ILP32E-NEXT:    flw fs6, 120(a1)
-; ILP32E-NEXT:    flw fs7, 124(a1)
-; ILP32E-NEXT:    flw fs8, 96(a1)
-; ILP32E-NEXT:    flw fs9, 100(a1)
-; ILP32E-NEXT:    flw fs10, 104(a1)
-; ILP32E-NEXT:    flw fs11, 108(a1)
-; ILP32E-NEXT:    fsw fs7, 124(a1)
-; ILP32E-NEXT:    fsw fs6, 120(a1)
-; ILP32E-NEXT:    fsw fs5, 116(a1)
-; ILP32E-NEXT:    fsw fs4, 112(a1)
-; ILP32E-NEXT:    fsw fs11, 108(a1)
-; ILP32E-NEXT:    fsw fs10, 104(a1)
-; ILP32E-NEXT:    fsw fs9, 100(a1)
-; ILP32E-NEXT:    fsw fs8, 96(a1)
-; ILP32E-NEXT:    fsw fs3, 92(a1)
-; ILP32E-NEXT:    fsw fs2, 88(a1)
-; ILP32E-NEXT:    fsw fs1, 84(a1)
-; ILP32E-NEXT:    fsw fs0, 80(a1)
-; ILP32E-NEXT:    fsw ft11, 76(a1)
-; ILP32E-NEXT:    fsw ft10, 72(a1)
-; ILP32E-NEXT:    fsw ft9, 68(a1)
-; ILP32E-NEXT:    fsw ft8, 64(a1)
-; ILP32E-NEXT:    fsw fa7, 60(a1)
-; ILP32E-NEXT:    fsw fa6, 56(a1)
-; ILP32E-NEXT:    fsw ft7, 52(a1)
-; ILP32E-NEXT:    fsw ft6, 48(a1)
-; ILP32E-NEXT:    fsw ft5, 44(a1)
-; ILP32E-NEXT:    fsw ft4, 40(a1)
-; ILP32E-NEXT:    fsw ft3, 36(a1)
-; ILP32E-NEXT:    fsw ft2, 32(a1)
-; ILP32E-NEXT:    fsw ft1, 28(a1)
-; ILP32E-NEXT:    fsw ft0, 24(a1)
-; ILP32E-NEXT:    fsw fa0, 20(a1)
-; ILP32E-NEXT:    fsw fa1, 16(a1)
-; ILP32E-NEXT:    fsw fa2, %lo(var+12)(a0)
-; ILP32E-NEXT:    fsw fa3, %lo(var+8)(a0)
-; ILP32E-NEXT:    fsw fa4, %lo(var+4)(a0)
-; ILP32E-NEXT:    fsw fa5, %lo(var)(a0)
+; ILP32E-NEXT:    addi a0, a0, %lo(var)
+; ILP32E-NEXT:    flw fa5, 0(a0)
+; ILP32E-NEXT:    flw fa4, 4(a0)
+; ILP32E-NEXT:    flw fa3, 8(a0)
+; ILP32E-NEXT:    flw fa2, 12(a0)
+; ILP32E-NEXT:    flw fa1, 16(a0)
+; ILP32E-NEXT:    flw fa0, 20(a0)
+; ILP32E-NEXT:    flw ft0, 24(a0)
+; ILP32E-NEXT:    flw ft1, 28(a0)
+; ILP32E-NEXT:    flw ft2, 32(a0)
+; ILP32E-NEXT:    flw ft3, 36(a0)
+; ILP32E-NEXT:    flw ft4, 40(a0)
+; ILP32E-NEXT:    flw ft5, 44(a0)
+; ILP32E-NEXT:    flw ft6, 48(a0)
+; ILP32E-NEXT:    flw ft7, 52(a0)
+; ILP32E-NEXT:    flw fa6, 56(a0)
+; ILP32E-NEXT:    flw fa7, 60(a0)
+; ILP32E-NEXT:    flw ft8, 64(a0)
+; ILP32E-NEXT:    flw ft9, 68(a0)
+; ILP32E-NEXT:    flw ft10, 72(a0)
+; ILP32E-NEXT:    flw ft11, 76(a0)
+; ILP32E-NEXT:    flw fs0, 80(a0)
+; ILP32E-NEXT:    flw fs1, 84(a0)
+; ILP32E-NEXT:    flw fs2, 88(a0)
+; ILP32E-NEXT:    flw fs3, 92(a0)
+; ILP32E-NEXT:    flw fs4, 112(a0)
+; ILP32E-NEXT:    flw fs5, 116(a0)
+; ILP32E-NEXT:    flw fs6, 120(a0)
+; ILP32E-NEXT:    flw fs7, 124(a0)
+; ILP32E-NEXT:    flw fs8, 96(a0)
+; ILP32E-NEXT:    flw fs9, 100(a0)
+; ILP32E-NEXT:    flw fs10, 104(a0)
+; ILP32E-NEXT:    flw fs11, 108(a0)
+; ILP32E-NEXT:    fsw fs7, 124(a0)
+; ILP32E-NEXT:    fsw fs6, 120(a0)
+; ILP32E-NEXT:    fsw fs5, 116(a0)
+; ILP32E-NEXT:    fsw fs4, 112(a0)
+; ILP32E-NEXT:    fsw fs11, 108(a0)
+; ILP32E-NEXT:    fsw fs10, 104(a0)
+; ILP32E-NEXT:    fsw fs9, 100(a0)
+; ILP32E-NEXT:    fsw fs8, 96(a0)
+; ILP32E-NEXT:    fsw fs3, 92(a0)
+; ILP32E-NEXT:    fsw fs2, 88(a0)
+; ILP32E-NEXT:    fsw fs1, 84(a0)
+; ILP32E-NEXT:    fsw fs0, 80(a0)
+; ILP32E-NEXT:    fsw ft11, 76(a0)
+; ILP32E-NEXT:    fsw ft10, 72(a0)
+; ILP32E-NEXT:    fsw ft9, 68(a0)
+; ILP32E-NEXT:    fsw ft8, 64(a0)
+; ILP32E-NEXT:    fsw fa7, 60(a0)
+; ILP32E-NEXT:    fsw fa6, 56(a0)
+; ILP32E-NEXT:    fsw ft7, 52(a0)
+; ILP32E-NEXT:    fsw ft6, 48(a0)
+; ILP32E-NEXT:    fsw ft5, 44(a0)
+; ILP32E-NEXT:    fsw ft4, 40(a0)
+; ILP32E-NEXT:    fsw ft3, 36(a0)
+; ILP32E-NEXT:    fsw ft2, 32(a0)
+; ILP32E-NEXT:    fsw ft1, 28(a0)
+; ILP32E-NEXT:    fsw ft0, 24(a0)
+; ILP32E-NEXT:    fsw fa0, 20(a0)
+; ILP32E-NEXT:    fsw fa1, 16(a0)
+; ILP32E-NEXT:    fsw fa2, 12(a0)
+; ILP32E-NEXT:    fsw fa3, 8(a0)
+; ILP32E-NEXT:    fsw fa4, 4(a0)
+; ILP32E-NEXT:    fsw fa5, 0(a0)
 ; ILP32E-NEXT:    ret
 ;
 ; LP64-LABEL: callee:
 ; LP64:       # %bb.0:
 ; LP64-NEXT:    lui a0, %hi(var)
-; LP64-NEXT:    flw fa5, %lo(var)(a0)
-; LP64-NEXT:    flw fa4, %lo(var+4)(a0)
-; LP64-NEXT:    flw fa3, %lo(var+8)(a0)
-; LP64-NEXT:    flw fa2, %lo(var+12)(a0)
-; LP64-NEXT:    addi a1, a0, %lo(var)
-; LP64-NEXT:    flw fa1, 16(a1)
-; LP64-NEXT:    flw fa0, 20(a1)
-; LP64-NEXT:    flw ft0, 24(a1)
-; LP64-NEXT:    flw ft1, 28(a1)
-; LP64-NEXT:    flw ft2, 32(a1)
-; LP64-NEXT:    flw ft3, 36(a1)
-; LP64-NEXT:    flw ft4, 40(a1)
-; LP64-NEXT:    flw ft5, 44(a1)
-; LP64-NEXT:    flw ft6, 48(a1)
-; LP64-NEXT:    flw ft7, 52(a1)
-; LP64-NEXT:    flw fa6, 56(a1)
-; LP64-NEXT:    flw fa7, 60(a1)
-; LP64-NEXT:    flw ft8, 64(a1)
-; LP64-NEXT:    flw ft9, 68(a1)
-; LP64-NEXT:    flw ft10, 72(a1)
-; LP64-NEXT:    flw ft11, 76(a1)
-; LP64-NEXT:    flw fs0, 80(a1)
-; LP64-NEXT:    flw fs1, 84(a1)
-; LP64-NEXT:    flw fs2, 88(a1)
-; LP64-NEXT:    flw fs3, 92(a1)
-; LP64-NEXT:    flw fs4, 112(a1)
-; LP64-NEXT:    flw fs5, 116(a1)
-; LP64-NEXT:    flw fs6, 120(a1)
-; LP64-NEXT:    flw fs7, 124(a1)
-; LP64-NEXT:    flw fs8, 96(a1)
-; LP64-NEXT:    flw fs9, 100(a1)
-; LP64-NEXT:    flw fs10, 104(a1)
-; LP64-NEXT:    flw fs11, 108(a1)
-; LP64-NEXT:    fsw fs7, 124(a1)
-; LP64-NEXT:    fsw fs6, 120(a1)
-; LP64-NEXT:    fsw fs5, 116(a1)
-; LP64-NEXT:    fsw fs4, 112(a1)
-; LP64-NEXT:    fsw fs11, 108(a1)
-; LP64-NEXT:    fsw fs10, 104(a1)
-; LP64-NEXT:    fsw fs9, 100(a1)
-; LP64-NEXT:    fsw fs8, 96(a1)
-; LP64-NEXT:    fsw fs3, 92(a1)
-; LP64-NEXT:    fsw fs2, 88(a1)
-; LP64-NEXT:    fsw fs1, 84(a1)
-; LP64-NEXT:    fsw fs0, 80(a1)
-; LP64-NEXT:    fsw ft11, 76(a1)
-; LP64-NEXT:    fsw ft10, 72(a1)
-; LP64-NEXT:    fsw ft9, 68(a1)
-; LP64-NEXT:    fsw ft8, 64(a1)
-; LP64-NEXT:    fsw fa7, 60(a1)
-; LP64-NEXT:    fsw fa6, 56(a1)
-; LP64-NEXT:    fsw ft7, 52(a1)
-; LP64-NEXT:    fsw ft6, 48(a1)
-; LP64-NEXT:    fsw ft5, 44(a1)
-; LP64-NEXT:    fsw ft4, 40(a1)
-; LP64-NEXT:    fsw ft3, 36(a1)
-; LP64-NEXT:    fsw ft2, 32(a1)
-; LP64-NEXT:    fsw ft1, 28(a1)
-; LP64-NEXT:    fsw ft0, 24(a1)
-; LP64-NEXT:    fsw fa0, 20(a1)
-; LP64-NEXT:    fsw fa1, 16(a1)
-; LP64-NEXT:    fsw fa2, %lo(var+12)(a0)
-; LP64-NEXT:    fsw fa3, %lo(var+8)(a0)
-; LP64-NEXT:    fsw fa4, %lo(var+4)(a0)
-; LP64-NEXT:    fsw fa5, %lo(var)(a0)
+; LP64-NEXT:    addi a0, a0, %lo(var)
+; LP64-NEXT:    flw fa5, 0(a0)
+; LP64-NEXT:    flw fa4, 4(a0)
+; LP64-NEXT:    flw fa3, 8(a0)
+; LP64-NEXT:    flw fa2, 12(a0)
+; LP64-NEXT:    flw fa1, 16(a0)
+; LP64-NEXT:    flw fa0, 20(a0)
+; LP64-NEXT:    flw ft0, 24(a0)
+; LP64-NEXT:    flw ft1, 28(a0)
+; LP64-NEXT:    flw ft2, 32(a0)
+; LP64-NEXT:    flw ft3, 36(a0)
+; LP64-NEXT:    flw ft4, 40(a0)
+; LP64-NEXT:    flw ft5, 44(a0)
+; LP64-NEXT:    flw ft6, 48(a0)
+; LP64-NEXT:    flw ft7, 52(a0)
+; LP64-NEXT:    flw fa6, 56(a0)
+; LP64-NEXT:    flw fa7, 60(a0)
+; LP64-NEXT:    flw ft8, 64(a0)
+; LP64-NEXT:    flw ft9, 68(a0)
+; LP64-NEXT:    flw ft10, 72(a0)
+; LP64-NEXT:    flw ft11, 76(a0)
+; LP64-NEXT:    flw fs0, 80(a0)
+; LP64-NEXT:    flw fs1, 84(a0)
+; LP64-NEXT:    flw fs2, 88(a0)
+; LP64-NEXT:    flw fs3, 92(a0)
+; LP64-NEXT:    flw fs4, 112(a0)
+; LP64-NEXT:    flw fs5, 116(a0)
+; LP64-NEXT:    flw fs6, 120(a0)
+; LP64-NEXT:    flw fs7, 124(a0)
+; LP64-NEXT:    flw fs8, 96(a0)
+; LP64-NEXT:    flw fs9, 100(a0)
+; LP64-NEXT:    flw fs10, 104(a0)
+; LP64-NEXT:    flw fs11, 108(a0)
+; LP64-NEXT:    fsw fs7, 124(a0)
+; LP64-NEXT:    fsw fs6, 120(a0)
+; LP64-NEXT:    fsw fs5, 116(a0)
+; LP64-NEXT:    fsw fs4, 112(a0)
+; LP64-NEXT:    fsw fs11, 108(a0)
+; LP64-NEXT:    fsw fs10, 104(a0)
+; LP64-NEXT:    fsw fs9, 100(a0)
+; LP64-NEXT:    fsw fs8, 96(a0)
+; LP64-NEXT:    fsw fs3, 92(a0)
+; LP64-NEXT:    fsw fs2, 88(a0)
+; LP64-NEXT:    fsw fs1, 84(a0)
+; LP64-NEXT:    fsw fs0, 80(a0)
+; LP64-NEXT:    fsw ft11, 76(a0)
+; LP64-NEXT:    fsw ft10, 72(a0)
+; LP64-NEXT:    fsw ft9, 68(a0)
+; LP64-NEXT:    fsw ft8, 64(a0)
+; LP64-NEXT:    fsw fa7, 60(a0)
+; LP64-NEXT:    fsw fa6, 56(a0)
+; LP64-NEXT:    fsw ft7, 52(a0)
+; LP64-NEXT:    fsw ft6, 48(a0)
+; LP64-NEXT:    fsw ft5, 44(a0)
+; LP64-NEXT:    fsw ft4, 40(a0)
+; LP64-NEXT:    fsw ft3, 36(a0)
+; LP64-NEXT:    fsw ft2, 32(a0)
+; LP64-NEXT:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/155935


More information about the llvm-commits mailing list