[llvm] [RISCV] Restrict when we fold an ADD_LO into a load/store address. (PR #93129)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 20:28:18 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
Don't fold if there are loads/stores that use the ADD_LO with a non-zero immediate offset that can't be folded based on alignment.
This avoids cases where some loads/stores use the LUI directly and other loads/store uses the result of an ADDI that depends on the LUI.
This increases the latency to the load that we no longer fold, but reduces the need for a temporary register to hold the LUI result for multiple uses.
This is preparation for instroducing a rematerializable LUI+ADDI pseudoinstruction.
---
Patch is 591.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93129.diff
21 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+71-4)
- (modified) llvm/test/CodeGen/RISCV/bfloat-mem.ll (+5-5)
- (modified) llvm/test/CodeGen/RISCV/byval.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll (+1194-1210)
- (modified) llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll (+729-739)
- (modified) llvm/test/CodeGen/RISCV/callee-saved-gprs.ll (+1436-1474)
- (modified) llvm/test/CodeGen/RISCV/double-mem.ll (+19-19)
- (modified) llvm/test/CodeGen/RISCV/float-mem.ll (+10-10)
- (modified) llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll (+16-16)
- (modified) llvm/test/CodeGen/RISCV/global-merge-offset.ll (+7-7)
- (modified) llvm/test/CodeGen/RISCV/global-merge.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/half-mem.ll (+20-20)
- (modified) llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/mem.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/mem64.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/memcpy.ll (+70-70)
- (modified) llvm/test/CodeGen/RISCV/push-pop-popret.ll (+1434-1472)
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/saverestore.ll (+2249-128)
- (modified) llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index d965dd4fc9a95..7c0908f18e28b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2457,6 +2457,61 @@ static bool isWorthFoldingAdd(SDValue Add) {
return true;
}
+// To prevent SelectAddrRegImm from folding offsets that conflicts with the
+// fusion of PseudoLIAddr, check if the offset of every use of a given address
+// is within the alignment
+static bool areUserOffsetsWithinAlignment(SDValue Addr, Align Alignment) {
+ for (auto *Use : Addr->uses()) {
+ if (!Use->isMachineOpcode()) {
+ // Don't allow stores of the value. It must be used as the address.
+ if (Use->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(Use)->getValue() == Addr)
+ return false;
+ if (Use->getOpcode() == ISD::ATOMIC_STORE &&
+ cast<AtomicSDNode>(Use)->getVal() == Addr)
+ return false;
+ // If the user is direct load/store, there is no offset.
+ if (Use->getOpcode() == ISD::LOAD || Use->getOpcode() == ISD::STORE ||
+ Use->getOpcode() == ISD::ATOMIC_LOAD ||
+ Use->getOpcode() == ISD::ATOMIC_STORE)
+ continue;
+ if (Use->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Use->getOperand(1)) &&
+ Alignment > cast<ConstantSDNode>(Use->getOperand(1))->getSExtValue())
+ continue;
+
+ return false;
+ }
+
+ // If user is already selected, get offsets from load/store instructions
+ unsigned int Opcode = Use->getMachineOpcode();
+ if (Opcode == RISCV::LB || Opcode == RISCV::LBU || Opcode == RISCV::LH ||
+ Opcode == RISCV::LHU || Opcode == RISCV::LW || Opcode == RISCV::LWU ||
+ Opcode == RISCV::LD || Opcode == RISCV::FLH || Opcode == RISCV::FLW ||
+ Opcode == RISCV::FLD) {
+ if (auto *Offset = dyn_cast<ConstantSDNode>(Use->getOperand(1))) {
+ if (Offset->isZero() || Alignment > Offset->getSExtValue())
+ continue;
+ }
+ return false;
+ }
+ if (Opcode == RISCV::SB || Opcode == RISCV::SH || Opcode == RISCV::SW ||
+ Opcode == RISCV::SD || Opcode == RISCV::FSH || Opcode == RISCV::FSW ||
+ Opcode == RISCV::FSD) {
+ // Also check if Addr is used as the value of store.
+ if (Use->getOperand(0) == Addr)
+ return false;
+ if (auto *Offset = dyn_cast<ConstantSDNode>(Use->getOperand(2))) {
+ if (Offset->isZero() || Alignment > Offset->getSExtValue())
+ continue;
+ }
+ return false;
+ }
+ return false;
+ }
+
+ return true;
+}
bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
unsigned MaxShiftAmount,
SDValue &Base, SDValue &Index,
@@ -2520,9 +2575,21 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
MVT VT = Addr.getSimpleValueType();
if (Addr.getOpcode() == RISCVISD::ADD_LO) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
+ bool CanFold = true;
+ // Unconditionally fold if operand 1 is not a global address (e.g.
+ // externsymbol)
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
+ const DataLayout &DL = CurDAG->getDataLayout();
+ Align Alignment = commonAlignment(
+ GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
+ if (!areUserOffsetsWithinAlignment(Addr, Alignment))
+ CanFold = false;
+ }
+ if (CanFold) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
}
int64_t RV32ZdinxRange = IsINX ? 4 : 0;
@@ -2541,7 +2608,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
const DataLayout &DL = CurDAG->getDataLayout();
Align Alignment = commonAlignment(
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
- if (CVal == 0 || Alignment > CVal) {
+ if (areUserOffsetsWithinAlignment(Base, Alignment)) {
int64_t CombinedOffset = CVal + GA->getOffset();
Base = Base.getOperand(0);
Offset = CurDAG->getTargetGlobalAddress(
diff --git a/llvm/test/CodeGen/RISCV/bfloat-mem.ll b/llvm/test/CodeGen/RISCV/bfloat-mem.ll
index 4b6c0c29d660b..39340c85cfadc 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-mem.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-mem.ll
@@ -53,11 +53,11 @@ define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
; CHECK-NEXT: fadd.s fa5, fa4, fa5
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
; CHECK-NEXT: lui a0, %hi(G)
-; CHECK-NEXT: flh fa5, %lo(G)(a0)
-; CHECK-NEXT: addi a1, a0, %lo(G)
-; CHECK-NEXT: fsh fa0, %lo(G)(a0)
-; CHECK-NEXT: flh fa5, 18(a1)
-; CHECK-NEXT: fsh fa0, 18(a1)
+; CHECK-NEXT: addi a0, a0, %lo(G)
+; CHECK-NEXT: flh fa5, 0(a0)
+; CHECK-NEXT: fsh fa0, 0(a0)
+; CHECK-NEXT: flh fa5, 18(a0)
+; CHECK-NEXT: fsh fa0, 18(a0)
; CHECK-NEXT: ret
%1 = fadd bfloat %a, %b
%2 = load volatile bfloat, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/byval.ll b/llvm/test/CodeGen/RISCV/byval.ll
index 9151f3b03e7c2..c5e48ee75e482 100644
--- a/llvm/test/CodeGen/RISCV/byval.ll
+++ b/llvm/test/CodeGen/RISCV/byval.ll
@@ -22,15 +22,15 @@ define void @caller() nounwind {
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, %hi(foo)
-; RV32I-NEXT: lw a1, %lo(foo)(a0)
-; RV32I-NEXT: sw a1, 12(sp)
; RV32I-NEXT: addi a0, a0, %lo(foo)
; RV32I-NEXT: lw a1, 12(a0)
; RV32I-NEXT: sw a1, 24(sp)
; RV32I-NEXT: lw a1, 8(a0)
; RV32I-NEXT: sw a1, 20(sp)
-; RV32I-NEXT: lw a0, 4(a0)
-; RV32I-NEXT: sw a0, 16(sp)
+; RV32I-NEXT: lw a1, 4(a0)
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: lw a0, 0(a0)
+; RV32I-NEXT: sw a0, 12(sp)
; RV32I-NEXT: addi a0, sp, 12
; RV32I-NEXT: call callee
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
index 2122b3fd91788..b525f9aa59c09 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
@@ -28,281 +28,281 @@ define void @callee() nounwind {
; ILP32-LABEL: callee:
; ILP32: # %bb.0:
; ILP32-NEXT: lui a0, %hi(var)
-; ILP32-NEXT: flw fa5, %lo(var)(a0)
-; ILP32-NEXT: flw fa4, %lo(var+4)(a0)
-; ILP32-NEXT: flw fa3, %lo(var+8)(a0)
-; ILP32-NEXT: flw fa2, %lo(var+12)(a0)
-; ILP32-NEXT: addi a1, a0, %lo(var)
-; ILP32-NEXT: flw fa1, 16(a1)
-; ILP32-NEXT: flw fa0, 20(a1)
-; ILP32-NEXT: flw ft0, 24(a1)
-; ILP32-NEXT: flw ft1, 28(a1)
-; ILP32-NEXT: flw ft2, 32(a1)
-; ILP32-NEXT: flw ft3, 36(a1)
-; ILP32-NEXT: flw ft4, 40(a1)
-; ILP32-NEXT: flw ft5, 44(a1)
-; ILP32-NEXT: flw ft6, 48(a1)
-; ILP32-NEXT: flw ft7, 52(a1)
-; ILP32-NEXT: flw fa6, 56(a1)
-; ILP32-NEXT: flw fa7, 60(a1)
-; ILP32-NEXT: flw ft8, 64(a1)
-; ILP32-NEXT: flw ft9, 68(a1)
-; ILP32-NEXT: flw ft10, 72(a1)
-; ILP32-NEXT: flw ft11, 76(a1)
-; ILP32-NEXT: flw fs0, 80(a1)
-; ILP32-NEXT: flw fs1, 84(a1)
-; ILP32-NEXT: flw fs2, 88(a1)
-; ILP32-NEXT: flw fs3, 92(a1)
-; ILP32-NEXT: flw fs4, 96(a1)
-; ILP32-NEXT: flw fs5, 100(a1)
-; ILP32-NEXT: flw fs6, 104(a1)
-; ILP32-NEXT: flw fs7, 108(a1)
-; ILP32-NEXT: flw fs8, 124(a1)
-; ILP32-NEXT: flw fs9, 120(a1)
-; ILP32-NEXT: flw fs10, 116(a1)
-; ILP32-NEXT: flw fs11, 112(a1)
-; ILP32-NEXT: fsw fs8, 124(a1)
-; ILP32-NEXT: fsw fs9, 120(a1)
-; ILP32-NEXT: fsw fs10, 116(a1)
-; ILP32-NEXT: fsw fs11, 112(a1)
-; ILP32-NEXT: fsw fs7, 108(a1)
-; ILP32-NEXT: fsw fs6, 104(a1)
-; ILP32-NEXT: fsw fs5, 100(a1)
-; ILP32-NEXT: fsw fs4, 96(a1)
-; ILP32-NEXT: fsw fs3, 92(a1)
-; ILP32-NEXT: fsw fs2, 88(a1)
-; ILP32-NEXT: fsw fs1, 84(a1)
-; ILP32-NEXT: fsw fs0, 80(a1)
-; ILP32-NEXT: fsw ft11, 76(a1)
-; ILP32-NEXT: fsw ft10, 72(a1)
-; ILP32-NEXT: fsw ft9, 68(a1)
-; ILP32-NEXT: fsw ft8, 64(a1)
-; ILP32-NEXT: fsw fa7, 60(a1)
-; ILP32-NEXT: fsw fa6, 56(a1)
-; ILP32-NEXT: fsw ft7, 52(a1)
-; ILP32-NEXT: fsw ft6, 48(a1)
-; ILP32-NEXT: fsw ft5, 44(a1)
-; ILP32-NEXT: fsw ft4, 40(a1)
-; ILP32-NEXT: fsw ft3, 36(a1)
-; ILP32-NEXT: fsw ft2, 32(a1)
-; ILP32-NEXT: fsw ft1, 28(a1)
-; ILP32-NEXT: fsw ft0, 24(a1)
-; ILP32-NEXT: fsw fa0, 20(a1)
-; ILP32-NEXT: fsw fa1, 16(a1)
-; ILP32-NEXT: fsw fa2, %lo(var+12)(a0)
-; ILP32-NEXT: fsw fa3, %lo(var+8)(a0)
-; ILP32-NEXT: fsw fa4, %lo(var+4)(a0)
-; ILP32-NEXT: fsw fa5, %lo(var)(a0)
+; ILP32-NEXT: addi a0, a0, %lo(var)
+; ILP32-NEXT: flw fa5, 0(a0)
+; ILP32-NEXT: flw fa4, 4(a0)
+; ILP32-NEXT: flw fa3, 8(a0)
+; ILP32-NEXT: flw fa2, 12(a0)
+; ILP32-NEXT: flw fa1, 16(a0)
+; ILP32-NEXT: flw fa0, 20(a0)
+; ILP32-NEXT: flw ft0, 24(a0)
+; ILP32-NEXT: flw ft1, 28(a0)
+; ILP32-NEXT: flw ft2, 32(a0)
+; ILP32-NEXT: flw ft3, 36(a0)
+; ILP32-NEXT: flw ft4, 40(a0)
+; ILP32-NEXT: flw ft5, 44(a0)
+; ILP32-NEXT: flw ft6, 48(a0)
+; ILP32-NEXT: flw ft7, 52(a0)
+; ILP32-NEXT: flw fa6, 56(a0)
+; ILP32-NEXT: flw fa7, 60(a0)
+; ILP32-NEXT: flw ft8, 64(a0)
+; ILP32-NEXT: flw ft9, 68(a0)
+; ILP32-NEXT: flw ft10, 72(a0)
+; ILP32-NEXT: flw ft11, 76(a0)
+; ILP32-NEXT: flw fs0, 80(a0)
+; ILP32-NEXT: flw fs1, 84(a0)
+; ILP32-NEXT: flw fs2, 88(a0)
+; ILP32-NEXT: flw fs3, 92(a0)
+; ILP32-NEXT: flw fs4, 96(a0)
+; ILP32-NEXT: flw fs5, 100(a0)
+; ILP32-NEXT: flw fs6, 104(a0)
+; ILP32-NEXT: flw fs7, 108(a0)
+; ILP32-NEXT: flw fs8, 124(a0)
+; ILP32-NEXT: flw fs9, 120(a0)
+; ILP32-NEXT: flw fs10, 116(a0)
+; ILP32-NEXT: flw fs11, 112(a0)
+; ILP32-NEXT: fsw fs8, 124(a0)
+; ILP32-NEXT: fsw fs9, 120(a0)
+; ILP32-NEXT: fsw fs10, 116(a0)
+; ILP32-NEXT: fsw fs11, 112(a0)
+; ILP32-NEXT: fsw fs7, 108(a0)
+; ILP32-NEXT: fsw fs6, 104(a0)
+; ILP32-NEXT: fsw fs5, 100(a0)
+; ILP32-NEXT: fsw fs4, 96(a0)
+; ILP32-NEXT: fsw fs3, 92(a0)
+; ILP32-NEXT: fsw fs2, 88(a0)
+; ILP32-NEXT: fsw fs1, 84(a0)
+; ILP32-NEXT: fsw fs0, 80(a0)
+; ILP32-NEXT: fsw ft11, 76(a0)
+; ILP32-NEXT: fsw ft10, 72(a0)
+; ILP32-NEXT: fsw ft9, 68(a0)
+; ILP32-NEXT: fsw ft8, 64(a0)
+; ILP32-NEXT: fsw fa7, 60(a0)
+; ILP32-NEXT: fsw fa6, 56(a0)
+; ILP32-NEXT: fsw ft7, 52(a0)
+; ILP32-NEXT: fsw ft6, 48(a0)
+; ILP32-NEXT: fsw ft5, 44(a0)
+; ILP32-NEXT: fsw ft4, 40(a0)
+; ILP32-NEXT: fsw ft3, 36(a0)
+; ILP32-NEXT: fsw ft2, 32(a0)
+; ILP32-NEXT: fsw ft1, 28(a0)
+; ILP32-NEXT: fsw ft0, 24(a0)
+; ILP32-NEXT: fsw fa0, 20(a0)
+; ILP32-NEXT: fsw fa1, 16(a0)
+; ILP32-NEXT: fsw fa2, 12(a0)
+; ILP32-NEXT: fsw fa3, 8(a0)
+; ILP32-NEXT: fsw fa4, 4(a0)
+; ILP32-NEXT: fsw fa5, 0(a0)
; ILP32-NEXT: ret
;
; ILP32E-LABEL: callee:
; ILP32E: # %bb.0:
; ILP32E-NEXT: lui a0, %hi(var)
-; ILP32E-NEXT: flw fa5, %lo(var)(a0)
-; ILP32E-NEXT: flw fa4, %lo(var+4)(a0)
-; ILP32E-NEXT: flw fa3, %lo(var+8)(a0)
-; ILP32E-NEXT: flw fa2, %lo(var+12)(a0)
-; ILP32E-NEXT: addi a1, a0, %lo(var)
-; ILP32E-NEXT: flw fa1, 16(a1)
-; ILP32E-NEXT: flw fa0, 20(a1)
-; ILP32E-NEXT: flw ft0, 24(a1)
-; ILP32E-NEXT: flw ft1, 28(a1)
-; ILP32E-NEXT: flw ft2, 32(a1)
-; ILP32E-NEXT: flw ft3, 36(a1)
-; ILP32E-NEXT: flw ft4, 40(a1)
-; ILP32E-NEXT: flw ft5, 44(a1)
-; ILP32E-NEXT: flw ft6, 48(a1)
-; ILP32E-NEXT: flw ft7, 52(a1)
-; ILP32E-NEXT: flw fa6, 56(a1)
-; ILP32E-NEXT: flw fa7, 60(a1)
-; ILP32E-NEXT: flw ft8, 64(a1)
-; ILP32E-NEXT: flw ft9, 68(a1)
-; ILP32E-NEXT: flw ft10, 72(a1)
-; ILP32E-NEXT: flw ft11, 76(a1)
-; ILP32E-NEXT: flw fs0, 80(a1)
-; ILP32E-NEXT: flw fs1, 84(a1)
-; ILP32E-NEXT: flw fs2, 88(a1)
-; ILP32E-NEXT: flw fs3, 92(a1)
-; ILP32E-NEXT: flw fs4, 96(a1)
-; ILP32E-NEXT: flw fs5, 100(a1)
-; ILP32E-NEXT: flw fs6, 104(a1)
-; ILP32E-NEXT: flw fs7, 108(a1)
-; ILP32E-NEXT: flw fs8, 124(a1)
-; ILP32E-NEXT: flw fs9, 120(a1)
-; ILP32E-NEXT: flw fs10, 116(a1)
-; ILP32E-NEXT: flw fs11, 112(a1)
-; ILP32E-NEXT: fsw fs8, 124(a1)
-; ILP32E-NEXT: fsw fs9, 120(a1)
-; ILP32E-NEXT: fsw fs10, 116(a1)
-; ILP32E-NEXT: fsw fs11, 112(a1)
-; ILP32E-NEXT: fsw fs7, 108(a1)
-; ILP32E-NEXT: fsw fs6, 104(a1)
-; ILP32E-NEXT: fsw fs5, 100(a1)
-; ILP32E-NEXT: fsw fs4, 96(a1)
-; ILP32E-NEXT: fsw fs3, 92(a1)
-; ILP32E-NEXT: fsw fs2, 88(a1)
-; ILP32E-NEXT: fsw fs1, 84(a1)
-; ILP32E-NEXT: fsw fs0, 80(a1)
-; ILP32E-NEXT: fsw ft11, 76(a1)
-; ILP32E-NEXT: fsw ft10, 72(a1)
-; ILP32E-NEXT: fsw ft9, 68(a1)
-; ILP32E-NEXT: fsw ft8, 64(a1)
-; ILP32E-NEXT: fsw fa7, 60(a1)
-; ILP32E-NEXT: fsw fa6, 56(a1)
-; ILP32E-NEXT: fsw ft7, 52(a1)
-; ILP32E-NEXT: fsw ft6, 48(a1)
-; ILP32E-NEXT: fsw ft5, 44(a1)
-; ILP32E-NEXT: fsw ft4, 40(a1)
-; ILP32E-NEXT: fsw ft3, 36(a1)
-; ILP32E-NEXT: fsw ft2, 32(a1)
-; ILP32E-NEXT: fsw ft1, 28(a1)
-; ILP32E-NEXT: fsw ft0, 24(a1)
-; ILP32E-NEXT: fsw fa0, 20(a1)
-; ILP32E-NEXT: fsw fa1, 16(a1)
-; ILP32E-NEXT: fsw fa2, %lo(var+12)(a0)
-; ILP32E-NEXT: fsw fa3, %lo(var+8)(a0)
-; ILP32E-NEXT: fsw fa4, %lo(var+4)(a0)
-; ILP32E-NEXT: fsw fa5, %lo(var)(a0)
+; ILP32E-NEXT: addi a0, a0, %lo(var)
+; ILP32E-NEXT: flw fa5, 0(a0)
+; ILP32E-NEXT: flw fa4, 4(a0)
+; ILP32E-NEXT: flw fa3, 8(a0)
+; ILP32E-NEXT: flw fa2, 12(a0)
+; ILP32E-NEXT: flw fa1, 16(a0)
+; ILP32E-NEXT: flw fa0, 20(a0)
+; ILP32E-NEXT: flw ft0, 24(a0)
+; ILP32E-NEXT: flw ft1, 28(a0)
+; ILP32E-NEXT: flw ft2, 32(a0)
+; ILP32E-NEXT: flw ft3, 36(a0)
+; ILP32E-NEXT: flw ft4, 40(a0)
+; ILP32E-NEXT: flw ft5, 44(a0)
+; ILP32E-NEXT: flw ft6, 48(a0)
+; ILP32E-NEXT: flw ft7, 52(a0)
+; ILP32E-NEXT: flw fa6, 56(a0)
+; ILP32E-NEXT: flw fa7, 60(a0)
+; ILP32E-NEXT: flw ft8, 64(a0)
+; ILP32E-NEXT: flw ft9, 68(a0)
+; ILP32E-NEXT: flw ft10, 72(a0)
+; ILP32E-NEXT: flw ft11, 76(a0)
+; ILP32E-NEXT: flw fs0, 80(a0)
+; ILP32E-NEXT: flw fs1, 84(a0)
+; ILP32E-NEXT: flw fs2, 88(a0)
+; ILP32E-NEXT: flw fs3, 92(a0)
+; ILP32E-NEXT: flw fs4, 96(a0)
+; ILP32E-NEXT: flw fs5, 100(a0)
+; ILP32E-NEXT: flw fs6, 104(a0)
+; ILP32E-NEXT: flw fs7, 108(a0)
+; ILP32E-NEXT: flw fs8, 124(a0)
+; ILP32E-NEXT: flw fs9, 120(a0)
+; ILP32E-NEXT: flw fs10, 116(a0)
+; ILP32E-NEXT: flw fs11, 112(a0)
+; ILP32E-NEXT: fsw fs8, 124(a0)
+; ILP32E-NEXT: fsw fs9, 120(a0)
+; ILP32E-NEXT: fsw fs10, 116(a0)
+; ILP32E-NEXT: fsw fs11, 112(a0)
+; ILP32E-NEXT: fsw fs7, 108(a0)
+; ILP32E-NEXT: fsw fs6, 104(a0)
+; ILP32E-NEXT: fsw fs5, 100(a0)
+; ILP32E-NEXT: fsw fs4, 96(a0)
+; ILP32E-NEXT: fsw fs3, 92(a0)
+; ILP32E-NEXT: fsw fs2, 88(a0)
+; ILP32E-NEXT: fsw fs1, 84(a0)
+; ILP32E-NEXT: fsw fs0, 80(a0)
+; ILP32E-NEXT: fsw ft11, 76(a0)
+; ILP32E-NEXT: fsw ft10, 72(a0)
+; ILP32E-NEXT: fsw ft9, 68(a0)
+; ILP32E-NEXT: fsw ft8, 64(a0)
+; ILP32E-NEXT: fsw fa7, 60(a0)
+; ILP32E-NEXT: fsw fa6, 56(a0)
+; ILP32E-NEXT: fsw ft7, 52(a0)
+; ILP32E-NEXT: fsw ft6, 48(a0)
+; ILP32E-NEXT: fsw ft5, 44(a0)
+; ILP32E-NEXT: fsw ft4, 40(a0)
+; ILP32E-NEXT: fsw ft3, 36(a0)
+; ILP32E-NEXT: fsw ft2, 32(a0)
+; ILP32E-NEXT: fsw ft1, 28(a0)
+; ILP32E-NEXT: fsw ft0, 24(a0)
+; ILP32E-NEXT: fsw fa0, 20(a0)
+; ILP32E-NEXT: fsw fa1, 16(a0)
+; ILP32E-NEXT: fsw fa2, 12(a0)
+; ILP32E-NEXT: fsw fa3, 8(a0)
+; ILP32E-NEXT: fsw fa4, 4(a0)
+; ILP32E-NEXT: fsw fa5, 0(a0)
; ILP32E-NEXT: ret
;
; LP64-LABEL: callee:
; LP64: # %bb.0:
; LP64-NEXT: lui a0, %hi(var)
-; LP64-NEXT: flw fa5, %lo(var)(a0)
-; LP64-NEXT: flw fa4, %lo(var+4)(a0)
-; LP64-NEXT: flw fa3, %lo(var+8)(a0)
-; LP64-NEXT: flw fa2, %lo(var+12)(a0)
-; LP64-NEXT: addi a1, a0, %lo(var)
-; LP64-NEXT: flw fa1, 16(a1)
-; LP64-NEXT: flw fa0, 20(a1)
-; LP64-NEXT: flw ft0, 24(a1)
-; LP64-NEXT: flw ft1, 28(a1)
-; LP64-NEXT: flw ft2, 32(a1)
-; LP64-NEXT: flw ft3, 36(a1)
-; LP64-NEXT: flw ft4, 40(a1)
-; LP64-NEXT: flw ft5, 44(a1)
-; LP64-NEXT: flw ft6, 48(a1)
-; LP64-NEXT: flw ft7, 52(a1)
-; LP64-NEXT: flw fa6, 56(a1)
-; LP64-NEXT: flw fa7, 60(a1)
-; LP64-NEXT: flw ft8, 64(a1)
-; LP64-NEXT: flw ft9, 68(a1)
-; LP64-NEXT: flw ft10, 72(a1)
-; LP64-NEXT: flw ft11, 76(a1)
-; LP64-NEXT: flw fs0, 80(a1)
-; LP64-NEXT: flw fs1, 84(a1)
-; LP64-NEXT: flw fs2, 88(a1)
-; LP64-NEXT: flw fs3, 92(a1)
-; LP64-NEXT: flw fs4, 96(a1)
-; LP64-NEXT: flw fs5, 100(a1)
-; LP64-NEXT: flw fs6, 104(a1)
-; LP64-NEXT: flw fs7, 108(a1)
-; LP64-NEXT: flw fs8, 124(a1)
-; LP64-NEXT: flw fs9, 120(a1)
-; LP64-NEXT: flw fs10, 116(a1)
-; LP64-NEXT: flw fs11, 112(a1)
-; LP64-NEXT: fsw fs8, 124(a1)
-; LP64-NEXT: fsw fs9, 120(a1)
-; LP64-NEXT: fsw fs10, 116(a1)
-; LP64-NEXT: fsw fs11, 112(a1)
-; LP64-NEXT: fsw fs7, 108(a1)
-; LP64-NEXT: fsw fs6, 104(a1)
-; LP64-NEXT: fsw fs5, 100(a1)
-; LP64-NEXT: fsw fs4, 96(a1)
-; LP64-NEXT: fsw fs3, 92(a1)
-; LP64-NEXT: fsw fs2, 88(a1)
-; LP64-NEXT: fsw fs1, 84(a1)
-; LP64-NEXT: fsw fs0, 80(a1)
-; LP64-NEXT: fsw ft11, 76(a1)
-; LP64-NEXT: fsw ft10, 72(a1)
-; LP64-NEXT: fsw ft9, 68(a1)
-; LP64-NEXT: fsw ft8, 64(a1)
-; LP64-NEXT: fsw fa7, 60(a1)
-; LP64-NEXT: fsw fa6, 56(a1)
-; LP64-NEXT: fsw ft7, 52(a1)
-; LP64-NEXT: fsw ft6, 48(a1)
-; LP64-NEXT: fsw ft5, 44(a1)
-; LP64-NEXT: fsw ft4, 40(a1)
-; LP64-NEXT: fsw ft3, 36(a1)
-; LP64-NEXT: fsw ft2, 32(a1)
-; LP64-NEXT: fsw ft1, 28(a1)
-; LP64-NEXT: fsw ft0, 24(a1)
-; LP64-NEXT: fsw fa0, 20(a1)
-; LP64-NEXT: fsw fa1, 16(a1)
-; LP64-NEXT: fsw fa2, %lo(var+12)(a0)
-; LP64-NEXT: fsw fa3, %lo(var+8)(a0)
-; LP64-NEXT: fsw fa4, %lo(var+4)(a0)
-; LP64-NEXT: fsw fa5, %lo(var)(a0)
+; LP64-NEXT: addi a0, a0, %lo(var)
+; LP64-NEXT: flw fa5, 0(a0)
+; LP64-NEXT: flw fa4, 4(a0)
+; LP64-NEXT: flw fa3, 8(a0)
+; LP64-NEXT: flw fa2, 12(a0)
+; LP64-NEXT: flw fa1, 16(a0)
+; LP64-NEXT: flw fa0, 20(a0)
+; LP64-NEXT: flw ft0, 24(a0)
+; LP64-NEXT: flw ft1, 28(a0)
+; LP64-NEXT: flw ft2, 32(a0)
+; LP64-NEXT: flw ft3, 36(a0)
+; LP64-NEXT: flw ft4, 40(a0)
+; LP64-NEXT: flw ft5, 44(a0)
+; LP64-NEXT: flw ft6, 48(a0)
+; LP64-NEXT: flw ft7, 52(a0)
+; LP64-NEXT: flw fa6, 56(a0)
+; LP64-NEXT: flw fa7, 60(a0)
+; LP64-NEXT: flw ft8, 64(a0)
+; LP64-NEXT: flw ft9, 68(a0)
+; LP64-NEXT: flw ft10, 72(a0)
+; LP64-NEXT: flw ft11, 76(a0)
+; LP64-NEXT: flw fs0, 80(a0)
+; LP64-NEXT: flw fs1, 84(a0)
+; LP64-NEXT: flw fs2, 88(a0)
+; LP64-NEXT: flw fs3, 92(a0)
+; LP64-NEXT: flw fs4, 96(a0)
+; LP64-NEXT: flw fs5, 100(a0)
+; LP64-NEXT: flw fs6, 104(a0)
+; LP64-NEXT: flw fs7, 108(a0)
+; LP64-NEXT: flw fs8, 124(a0)
+; LP64-NEXT: flw fs9, 120(a0)
+; LP64-NEXT: flw fs10, 116(a0)
+; LP64-NEXT: flw fs11, 112(a0)
+; LP64-NEXT: fsw fs8, 124(a0)
+; LP64-NEXT: fsw fs9, 120(a0)
+; LP64-NEXT: fsw fs10, 116(a0)
+; LP64-NEXT: fsw fs11, 112(a0)
+; LP64-NEXT: fsw fs7, 108(a0)
+; LP64-NEXT: fsw fs6, 104(a0)
+; LP64-NEXT: fsw fs5, 100(a0)
+; LP64-NEXT: fsw fs4, 96(a0)
+; LP64-NEXT: fsw fs3, 92(a0)
+; LP64-NEXT: fsw fs2, 88(a0)
+; LP64-NEXT: fsw fs1, 84(a0)
+; LP64-NEXT: fsw fs0, 80(a0)
+; LP64-NEXT: fsw ft11, 76(a0)
+; LP64-NEXT: fsw ft10, 72(a0)
+; LP64-NEXT: fsw ft9, 68(a0)
+; LP64-NEXT: fsw ft8, 64(a0)
+; LP64-NEXT: fsw fa7, 60(a0)
+; LP64-NEXT: fsw fa6, 56(a0)
+; LP64-NEXT: fsw ft7, 52(a0)
+; LP64-NEXT: fsw ft6, 48(a0)
+; LP64-NEXT: fsw ft5, 44(a0)
+; LP64-NEXT: fsw ft4, 40(a0)
+; LP64-NEXT: fsw ft3, 36(a0)
+; LP64-NEXT: fsw ft2, 32(a0)
+; LP64-NEXT: fsw ft1, 28(a0)
+; LP64-NEXT: fsw ft0, 24(a0)
+; LP64-NEXT: fsw fa0, 20(a0)
+; LP64-NEXT: fsw fa1, 16(a0)
+; LP64-NEXT: fsw fa2, 12(a0)
+; LP64-NEXT: fsw fa3, 8(a0)
+; LP64-NEXT: fsw fa4, 4(a0)
+; LP64-NEXT: fsw fa5, 0(a0)
; LP64-NEXT: ret
;
; LP64E-LABEL: callee:
; LP64E: # %bb.0:
; LP64E-NEXT: lui a0, %hi(var)
-; LP64E-NEXT: flw fa5, %lo(var)(a0)
-; LP64E-NEXT: flw fa4, %lo(var+4)(a0)
-; LP64E-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93129
More information about the llvm-commits
mailing list