[llvm] [RISCV] Use Zilsd Pseudos in ISel (PR #169580)
Sam Elliott via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 28 13:35:59 PDT 2026
https://github.com/lenary updated https://github.com/llvm/llvm-project/pull/169580
>From 386a4971ae9ddd5170f05976ce27efa533853ea9 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Tue, 25 Nov 2025 14:54:36 -0800
Subject: [PATCH 01/16] [RISCV] Use Zilsd Pseudos in ISel
This is proposed as an alternative to #169529. The idea here is during
selection, to choose between directly generating `LD`/`SD` or generating
`PseudoLD_RV32_OPT`/`PseudoSD_RV32_OPT` based on the volatility of the
access.
Volatile operations will always become `LD`/`SD`, but non-volatile
operations have a chance of becoming a pair of `LW`/`SW` depending on
the register allocation, which might save some `MV` instructions.
The advantage of this approach is that we don't need to go searching for
instructions to pair (including comparing their memory operands) in the
pre-ra pass, we already know these are paired, but they don't constrain
the register allocator, unlike `LD`/`SD`.
This PR is maybe not enough - we probably have to check the passes
between ISel and the Pre-RA Load/Store Pairing pass cope with this
correctly.
This also fixes a verifier error with the kill flags.
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 79 ++++++++++++-------
.../Target/RISCV/RISCVLoadStoreOptimizer.cpp | 3 +-
llvm/test/CodeGen/RISCV/zilsd.ll | 42 +++++-----
3 files changed, 73 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5025122db3681..f00ec4e57e46b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1817,52 +1817,77 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
case RISCVISD::LD_RV32: {
assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
+ auto *MemNode = cast<MemSDNode>(Node);
+
SDValue Base, Offset;
- SDValue Chain = Node->getOperand(0);
- SDValue Addr = Node->getOperand(1);
+ SDValue Chain = MemNode->getChain();
+ SDValue Addr = MemNode->getBasePtr();
SelectAddrRegImm(Addr, Base, Offset);
SDValue Ops[] = {Base, Offset, Chain};
- MachineSDNode *New = CurDAG->getMachineNode(
- RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
- SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
- MVT::i32, SDValue(New, 0));
- SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
- MVT::i32, SDValue(New, 0));
- CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
+ MachineSDNode *New;
+ SDValue Lo, Hi, OutChain;
+ if (MemNode->isVolatile()) {
+ New = CurDAG->getMachineNode(RISCV::LD_RV32, DL,
+ {MVT::Untyped, MVT::Other}, Ops);
+
+ Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, MVT::i32,
+ SDValue(New, 0));
+ Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, MVT::i32,
+ SDValue(New, 0));
+ OutChain = SDValue(New, 1);
+ } else {
+ New = CurDAG->getMachineNode(RISCV::PseudoLD_RV32_OPT, DL,
+ {MVT::i32, MVT::i32, MVT::Other}, Ops);
+ Lo = SDValue(New, 0);
+ Hi = SDValue(New, 1);
+ OutChain = SDValue(New, 2);
+ }
+
+ CurDAG->setNodeMemRefs(New, {MemNode->getMemOperand()});
ReplaceUses(SDValue(Node, 0), Lo);
ReplaceUses(SDValue(Node, 1), Hi);
- ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
+ ReplaceUses(SDValue(Node, 2), OutChain);
CurDAG->RemoveDeadNode(Node);
return;
}
case RISCVISD::SD_RV32: {
+ auto *MemNode = cast<MemSDNode>(Node);
+
SDValue Base, Offset;
- SDValue Chain = Node->getOperand(0);
- SDValue Addr = Node->getOperand(3);
+ SDValue Chain = MemNode->getChain();
+ SDValue Addr = MemNode->getBasePtr();
SelectAddrRegImm(Addr, Base, Offset);
SDValue Lo = Node->getOperand(1);
SDValue Hi = Node->getOperand(2);
- SDValue RegPair;
- // Peephole to use X0_Pair for storing zero.
- if (isNullConstant(Lo) && isNullConstant(Hi)) {
- RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
- } else {
- SDValue Ops[] = {
- CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
- CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
- CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
+ MachineSDNode *New;
+ if (MemNode->isVolatile()) {
+ SDValue RegPair;
+ // Peephole to use X0_Pair for storing zero.
+ if (isNullConstant(Lo) && isNullConstant(Hi)) {
+ RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
+ } else {
+ SDValue Ops[] = {
+ CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
+ Lo, CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
+ Hi, CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
+
+ RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+ MVT::Untyped, Ops),
+ 0);
+ }
- RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- MVT::Untyped, Ops),
- 0);
+ New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
+ {RegPair, Base, Offset, Chain});
+ } else {
+ New = CurDAG->getMachineNode(RISCV::PseudoSD_RV32_OPT, DL, MVT::Other,
+ {Lo, Hi, Base, Offset, Chain});
}
- MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
- {RegPair, Base, Offset, Chain});
- CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
+ CurDAG->setNodeMemRefs(New, {MemNode->getMemOperand()});
+
ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
CurDAG->RemoveDeadNode(Node);
return;
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
index a22ab6bfc04b8..4842d4e29fb7d 100644
--- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -509,7 +509,8 @@ void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
FirstReg != SecondReg &&
"First register and second register is impossible to be same register");
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
- .addReg(FirstReg, getKillRegState(FirstOp.isKill()))
+ .addReg(FirstReg,
+ getKillRegState(FirstOp.isKill() && FirstReg != BaseReg))
.addReg(BaseReg);
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll
index 27b1ff76f6f05..75c1682b0b662 100644
--- a/llvm/test/CodeGen/RISCV/zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd.ll
@@ -9,9 +9,10 @@
define i64 @load(ptr %a) nounwind {
; CHECK-LABEL: load:
; CHECK: # %bb.0:
-; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: ld a0, 80(a0)
-; CHECK-NEXT: ld zero, 0(a2)
+; CHECK-NEXT: lw a2, 80(a0)
+; CHECK-NEXT: lw a1, 84(a0)
+; CHECK-NEXT: ld zero, 0(a0)
+; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: ret
%1 = getelementptr i64, ptr %a, i32 10
%2 = load i64, ptr %1
@@ -44,10 +45,8 @@ define i64 @load_align4(ptr %a) nounwind {
define void @store(ptr %a, i64 %b) nounwind {
; CHECK-LABEL: store:
; CHECK: # %bb.0:
-; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: sd a2, 0(a0)
-; CHECK-NEXT: sd a2, 88(a0)
+; CHECK-NEXT: sw a1, 0(a1)
+; CHECK-NEXT: sw a2, 4(a1)
; CHECK-NEXT: ret
store i64 %b, ptr %a
%1 = getelementptr i64, ptr %a, i32 11
@@ -64,16 +63,14 @@ define void @store_align4(ptr %a, i64 %b) nounwind {
;
; FAST-LABEL: store_align4:
; FAST: # %bb.0:
-; FAST-NEXT: mv a3, a2
-; FAST-NEXT: mv a2, a1
-; FAST-NEXT: sd a2, 88(a0)
+; FAST-NEXT: sw a1, 0(a1)
+; FAST-NEXT: sw a2, 4(a1)
; FAST-NEXT: ret
;
; 4BYTEALIGN-LABEL: store_align4:
; 4BYTEALIGN: # %bb.0:
-; 4BYTEALIGN-NEXT: mv a3, a2
-; 4BYTEALIGN-NEXT: mv a2, a1
-; 4BYTEALIGN-NEXT: sd a2, 88(a0)
+; 4BYTEALIGN-NEXT: sw a1, 0(a1)
+; 4BYTEALIGN-NEXT: sw a2, 4(a1)
; 4BYTEALIGN-NEXT: ret
%1 = getelementptr i64, ptr %a, i32 11
store i64 %b, ptr %1, align 4
@@ -158,9 +155,8 @@ define void @store_unaligned(ptr %p, i64 %v) {
;
; FAST-LABEL: store_unaligned:
; FAST: # %bb.0:
-; FAST-NEXT: mv a3, a2
-; FAST-NEXT: mv a2, a1
-; FAST-NEXT: sd a2, 0(a0)
+; FAST-NEXT: sw a1, 0(a1)
+; FAST-NEXT: sw a2, 4(a1)
; FAST-NEXT: ret
;
; 4BYTEALIGN-LABEL: store_unaligned:
@@ -200,8 +196,7 @@ entry:
define void @store_g() nounwind {
; CHECK-LABEL: store_g:
; CHECK: # %bb.0: # %entyr
-; CHECK-NEXT: lui a0, %hi(g)
-; CHECK-NEXT: sd zero, %lo(g)(a0)
+; CHECK-NEXT: sd zero, 0(zero)
; CHECK-NEXT: ret
entyr:
store i64 0, ptr @g
@@ -213,11 +208,12 @@ define void @large_offset(ptr nocapture %p, i64 %d) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a1, 4
; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ld a2, -384(a0)
-; CHECK-NEXT: addi a2, a2, 1
-; CHECK-NEXT: seqz a1, a2
-; CHECK-NEXT: add a3, a3, a1
-; CHECK-NEXT: sd a2, -384(a0)
+; CHECK-NEXT: ld a0, -384(a0)
+; CHECK-NEXT: addi a2, a0, 1
+; CHECK-NEXT: seqz a3, a2
+; CHECK-NEXT: add a1, a1, a3
+; CHECK-NEXT: sw a2, 1(a0)
+; CHECK-NEXT: sw a1, 5(a0)
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i64, ptr %p, i64 2000
>From 181be8def764545f6e4b59abf0355c21e12f01a4 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Tue, 25 Nov 2025 15:07:07 -0800
Subject: [PATCH 02/16] Fix Operand Issue
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 2 +-
llvm/test/CodeGen/RISCV/zilsd.ll | 48 +++++++++------------
2 files changed, 21 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index f00ec4e57e46b..0be5d8e731b60 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1856,7 +1856,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue Base, Offset;
SDValue Chain = MemNode->getChain();
- SDValue Addr = MemNode->getBasePtr();
+ SDValue Addr = Node->getOperand(3);
SelectAddrRegImm(Addr, Base, Offset);
SDValue Lo = Node->getOperand(1);
diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll
index 75c1682b0b662..4146535318fb8 100644
--- a/llvm/test/CodeGen/RISCV/zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd.ll
@@ -45,8 +45,10 @@ define i64 @load_align4(ptr %a) nounwind {
define void @store(ptr %a, i64 %b) nounwind {
; CHECK-LABEL: store:
; CHECK: # %bb.0:
-; CHECK-NEXT: sw a1, 0(a1)
-; CHECK-NEXT: sw a2, 4(a1)
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: sw a2, 4(a0)
+; CHECK-NEXT: sw a1, 88(a0)
+; CHECK-NEXT: sw a2, 92(a0)
; CHECK-NEXT: ret
store i64 %b, ptr %a
%1 = getelementptr i64, ptr %a, i32 11
@@ -55,23 +57,11 @@ define void @store(ptr %a, i64 %b) nounwind {
}
define void @store_align4(ptr %a, i64 %b) nounwind {
-; SLOW-LABEL: store_align4:
-; SLOW: # %bb.0:
-; SLOW-NEXT: sw a1, 88(a0)
-; SLOW-NEXT: sw a2, 92(a0)
-; SLOW-NEXT: ret
-;
-; FAST-LABEL: store_align4:
-; FAST: # %bb.0:
-; FAST-NEXT: sw a1, 0(a1)
-; FAST-NEXT: sw a2, 4(a1)
-; FAST-NEXT: ret
-;
-; 4BYTEALIGN-LABEL: store_align4:
-; 4BYTEALIGN: # %bb.0:
-; 4BYTEALIGN-NEXT: sw a1, 0(a1)
-; 4BYTEALIGN-NEXT: sw a2, 4(a1)
-; 4BYTEALIGN-NEXT: ret
+; CHECK-LABEL: store_align4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sw a1, 88(a0)
+; CHECK-NEXT: sw a2, 92(a0)
+; CHECK-NEXT: ret
%1 = getelementptr i64, ptr %a, i32 11
store i64 %b, ptr %1, align 4
ret void
@@ -155,8 +145,8 @@ define void @store_unaligned(ptr %p, i64 %v) {
;
; FAST-LABEL: store_unaligned:
; FAST: # %bb.0:
-; FAST-NEXT: sw a1, 0(a1)
-; FAST-NEXT: sw a2, 4(a1)
+; FAST-NEXT: sw a1, 0(a0)
+; FAST-NEXT: sw a2, 4(a0)
; FAST-NEXT: ret
;
; 4BYTEALIGN-LABEL: store_unaligned:
@@ -196,7 +186,8 @@ entry:
define void @store_g() nounwind {
; CHECK-LABEL: store_g:
; CHECK: # %bb.0: # %entyr
-; CHECK-NEXT: sd zero, 0(zero)
+; CHECK-NEXT: lui a0, %hi(g)
+; CHECK-NEXT: sd zero, %lo(g)(a0)
; CHECK-NEXT: ret
entyr:
store i64 0, ptr @g
@@ -208,12 +199,13 @@ define void @large_offset(ptr nocapture %p, i64 %d) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a1, 4
; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ld a0, -384(a0)
-; CHECK-NEXT: addi a2, a0, 1
-; CHECK-NEXT: seqz a3, a2
-; CHECK-NEXT: add a1, a1, a3
-; CHECK-NEXT: sw a2, 1(a0)
-; CHECK-NEXT: sw a1, 5(a0)
+; CHECK-NEXT: lw a1, -384(a0)
+; CHECK-NEXT: lw a2, -380(a0)
+; CHECK-NEXT: addi a1, a1, 1
+; CHECK-NEXT: seqz a3, a1
+; CHECK-NEXT: add a2, a2, a3
+; CHECK-NEXT: sw a1, -384(a0)
+; CHECK-NEXT: sw a2, -380(a0)
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i64, ptr %p, i64 2000
>From e65c594b4a8fd1a005d1fa664484891b70987762 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Tue, 25 Nov 2025 15:09:16 -0800
Subject: [PATCH 03/16] Fix to kill flags not needed
---
llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
index 4842d4e29fb7d..a22ab6bfc04b8 100644
--- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -509,8 +509,7 @@ void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB,
FirstReg != SecondReg &&
"First register and second register is impossible to be same register");
MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
- .addReg(FirstReg,
- getKillRegState(FirstOp.isKill() && FirstReg != BaseReg))
+ .addReg(FirstReg, getKillRegState(FirstOp.isKill()))
.addReg(BaseReg);
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc))
>From 712614db6243cac846162a7a65c1e515a4bd9844 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Wed, 26 Nov 2025 14:07:59 -0800
Subject: [PATCH 04/16] Add reg alloc hints
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 21 ++++++++++++++++++++
llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td | 18 +++++++++++------
llvm/test/CodeGen/RISCV/zilsd.ll | 12 +++++------
3 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3b250d7d9ad1f..6b5557383596f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23166,8 +23166,29 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
}
+static void addPairRegisterHints(MachineInstr &MI) {
+ assert((MI.getOpcode() == RISCV::PseudoLD_RV32_OPT ||
+ MI.getOpcode() == RISCV::PseudoSD_RV32_OPT) &&
+ "Needs LD/SD Pseudo");
+
+ Register FirstReg = MI.getOperand(0).getReg();
+ Register SecondReg = MI.getOperand(1).getReg();
+
+ MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+
+ if (FirstReg.isVirtual() && SecondReg.isVirtual()) {
+ MRI.setRegAllocationHint(FirstReg, RISCVRI::RegPairEven, SecondReg);
+ MRI.setRegAllocationHint(SecondReg, RISCVRI::RegPairOdd, FirstReg);
+ }
+}
+
void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
+
+ if (MI.getOpcode() == RISCV::PseudoLD_RV32_OPT ||
+ MI.getOpcode() == RISCV::PseudoSD_RV32_OPT)
+ return addPairRegisterHints(MI);
+
// If instruction defines FRM operand, conservatively set it as non-dead to
// express data dependency with FRM users and prevent incorrect instruction
// reordering.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
index 4fc859f2547c1..e579ceebd18b7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
@@ -51,17 +51,23 @@ def PseudoSD_RV32 : PseudoStore<"sd", GPRPairRV32>;
def PseudoLD_RV32_OPT :
Pseudo<(outs GPR:$rd1, GPR:$rd2),
(ins GPR:$rs1, simm12_lo:$imm12), [], "", ""> {
- let hasSideEffects = 0;
- let mayLoad = 1;
- let mayStore = 0;
+ let hasSideEffects = false;
+ let mayLoad = true;
+ let mayStore = false;
+ let isCodeGenOnly = true;
+ let Size = 8; // Might become two LWs
+ let hasPostISelHook = true;
}
def PseudoSD_RV32_OPT :
Pseudo<(outs),
(ins GPR:$rs1, GPR:$rs2, GPR:$rs3, simm12_lo:$imm12), [], "", ""> {
- let hasSideEffects = 0;
- let mayLoad = 0;
- let mayStore = 1;
+ let hasSideEffects = false;
+ let mayLoad = false;
+ let mayStore = true;
+ let isCodeGenOnly = true;
+ let Size = 8; // Might become two SWs
+ let hasPostISelHook = true;
}
def : InstAlias<"ld $rd, (${rs1})", (LD_RV32 GPRPairRV32:$rd, GPR:$rs1, 0), 0>;
diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll
index 4146535318fb8..7ff969414cce2 100644
--- a/llvm/test/CodeGen/RISCV/zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd.ll
@@ -199,13 +199,11 @@ define void @large_offset(ptr nocapture %p, i64 %d) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a1, 4
; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: lw a1, -384(a0)
-; CHECK-NEXT: lw a2, -380(a0)
-; CHECK-NEXT: addi a1, a1, 1
-; CHECK-NEXT: seqz a3, a1
-; CHECK-NEXT: add a2, a2, a3
-; CHECK-NEXT: sw a1, -384(a0)
-; CHECK-NEXT: sw a2, -380(a0)
+; CHECK-NEXT: ld a2, -384(a0)
+; CHECK-NEXT: addi a2, a2, 1
+; CHECK-NEXT: seqz a1, a2
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: sd a2, -384(a0)
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i64, ptr %p, i64 2000
>From 1f2eafbcf9ef82481a08df88a8214830b743867d Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Wed, 26 Nov 2025 14:08:16 -0800
Subject: [PATCH 05/16] eliminateFrameIndex (untested)
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index a5aef4bea46ab..ca8343d4bcd4e 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -547,9 +547,11 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// MIPS Prefetch instructions require the offset to be 9 bits encoded.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
} else if ((Opc == RISCV::PseudoRV32ZdinxLD ||
- Opc == RISCV::PseudoRV32ZdinxSD) &&
+ Opc == RISCV::PseudoRV32ZdinxSD ||
+ Opc == RISCV::PseudoLD_RV32_OPT ||
+ Opc == RISCV::PseudoSD_RV32_OPT) &&
Lo12 >= 2044) {
- // This instruction will be split into 2 instructions. The second
+ // This instruction will/may be split into 2 instructions. The second
// instruction will add 4 to the immediate. If that would overflow 12
// bits, we can't fold the offset.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
>From 461bf0ff8136d63dce7360042800b16288d5bcab Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Wed, 26 Nov 2025 14:23:48 -0800
Subject: [PATCH 06/16] Stack test, but doesn't test eliminateFrameIndex
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 6 ++----
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 2 +-
llvm/test/CodeGen/RISCV/zilsd.ll | 23 +++++++++++++++++++++
3 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index ca8343d4bcd4e..a5aef4bea46ab 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -547,11 +547,9 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// MIPS Prefetch instructions require the offset to be 9 bits encoded.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
} else if ((Opc == RISCV::PseudoRV32ZdinxLD ||
- Opc == RISCV::PseudoRV32ZdinxSD ||
- Opc == RISCV::PseudoLD_RV32_OPT ||
- Opc == RISCV::PseudoSD_RV32_OPT) &&
+ Opc == RISCV::PseudoRV32ZdinxSD) &&
Lo12 >= 2044) {
- // This instruction will/may be split into 2 instructions. The second
+ // This instruction will be split into 2 instructions. The second
// instruction will add 4 to the immediate. If that would overflow 12
// bits, we can't fold the offset.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 11b7a0a3c691a..d53dfbe8e4761 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -368,7 +368,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
}
}
-let RegInfos = XLenPairRI, CopyCost = 2 in {
+let RegInfos = XLenPairRI, CopyCost = 2, AllocationPriority = 1 in {
def GPRPair : RISCVRegisterClass<[XLenPairVT, XLenPairFVT], 64, (add
X10_X11, X12_X13, X14_X15, X16_X17,
X6_X7,
diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll
index 7ff969414cce2..bd0c81b35e9b5 100644
--- a/llvm/test/CodeGen/RISCV/zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd.ll
@@ -212,3 +212,26 @@ entry:
store i64 %b, ptr %add.ptr, align 8
ret void
}
+
+define i64 @stack_access(ptr nocapture %p) nounwind {
+; CHECK-LABEL: stack_access:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: lw a2, 8(sp)
+; CHECK-NEXT: lw a1, 12(sp)
+; CHECK-NEXT: ld a4, 0(a0)
+; CHECK-NEXT: sw a2, 0(a0)
+; CHECK-NEXT: sw a1, 4(a0)
+; CHECK-NEXT: sd a4, 8(sp)
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+entry:
+ %stack = alloca i64, align 8
+ %a = load i64, ptr %stack, align 8
+ %b = load i64, ptr %p, align 8
+ store i64 %a, ptr %p, align 8
+ store i64 %b, ptr %stack, align 8
+ %c = load i64, ptr %p, align 8
+ ret i64 %c
+}
>From 5605557607e0f30dca422d18d3db785580dfd97a Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Mon, 1 Dec 2025 08:48:40 -0800
Subject: [PATCH 07/16] Remove isCodegenOnly
---
llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
index e579ceebd18b7..e34c2e37d45bc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td
@@ -54,7 +54,6 @@ def PseudoLD_RV32_OPT :
let hasSideEffects = false;
let mayLoad = true;
let mayStore = false;
- let isCodeGenOnly = true;
let Size = 8; // Might become two LWs
let hasPostISelHook = true;
}
@@ -65,7 +64,6 @@ def PseudoSD_RV32_OPT :
let hasSideEffects = false;
let mayLoad = false;
let mayStore = true;
- let isCodeGenOnly = true;
let Size = 8; // Might become two SWs
let hasPostISelHook = true;
}
>From d3be266eaaf0f8060fc957028a7069a4a71712c2 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Mon, 1 Dec 2025 08:57:00 -0800
Subject: [PATCH 08/16] Remove change to allocpriority
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index d53dfbe8e4761..11b7a0a3c691a 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -368,7 +368,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
}
}
-let RegInfos = XLenPairRI, CopyCost = 2, AllocationPriority = 1 in {
+let RegInfos = XLenPairRI, CopyCost = 2 in {
def GPRPair : RISCVRegisterClass<[XLenPairVT, XLenPairFVT], 64, (add
X10_X11, X12_X13, X14_X15, X16_X17,
X6_X7,
>From 3437eda8569d20917429a252efeb995008286508 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Mon, 1 Dec 2025 13:01:21 -0800
Subject: [PATCH 09/16] Pre-commit large spills test
---
llvm/test/CodeGen/RISCV/zilsd-large-spill.mir | 72 +++++++++++++++++++
1 file changed, 72 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/zilsd-large-spill.mir
diff --git a/llvm/test/CodeGen/RISCV/zilsd-large-spill.mir b/llvm/test/CodeGen/RISCV/zilsd-large-spill.mir
new file mode 100644
index 0000000000000..70305e81788d6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zilsd-large-spill.mir
@@ -0,0 +1,72 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+# RUN: llc %s -mtriple=riscv32 -mattr=+zilsd -start-before=prologepilog -o - | FileCheck %s
+
+# We want to make sure eliminateFrameIndex doesn't fold sp+2044 as an offset in
+# a GPR pair spill/reload instruction. When we split the pair spill, we would be
+# unable to add 4 to the immediate without overflowing simm12.
+
+--- |
+ define void @foo() {
+ ; CHECK-LABEL: foo:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: addi sp, sp, -2048
+ ; CHECK-NEXT: addi sp, sp, -32
+ ; CHECK-NEXT: .cfi_def_cfa_offset 2080
+ ; CHECK-NEXT: lui t0, 1
+ ; CHECK-NEXT: add t0, sp, t0
+ ; CHECK-NEXT: sd a0, -2024(t0) # 8-byte Folded Spill
+ ; CHECK-NEXT: lui a0, 1
+ ; CHECK-NEXT: add a0, sp, a0
+ ; CHECK-NEXT: sd a2, -2032(a0) # 8-byte Folded Spill
+ ; CHECK-NEXT: lui a0, 1
+ ; CHECK-NEXT: add a0, sp, a0
+ ; CHECK-NEXT: sd a4, -2040(a0) # 8-byte Folded Spill
+ ; CHECK-NEXT: sd a6, 2044(sp) # 8-byte Folded Spill
+ ; CHECK-NEXT: lui a0, 1
+ ; CHECK-NEXT: add a0, sp, a0
+ ; CHECK-NEXT: ld a0, -2024(a0) # 8-byte Folded Reload
+ ; CHECK-NEXT: lui a0, 1
+ ; CHECK-NEXT: add a0, sp, a0
+ ; CHECK-NEXT: ld a2, -2032(a0) # 8-byte Folded Reload
+ ; CHECK-NEXT: lui a0, 1
+ ; CHECK-NEXT: add a0, sp, a0
+ ; CHECK-NEXT: ld a4, -2040(a0) # 8-byte Folded Reload
+ ; CHECK-NEXT: ld a6, 2044(sp) # 8-byte Folded Reload
+ ; CHECK-NEXT: addi sp, sp, 2032
+ ; CHECK-NEXT: addi sp, sp, 48
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
+ ; CHECK-NEXT: ret
+ unreachable
+ }
+...
+---
+name: foo
+tracksRegLiveness: true
+tracksDebugUserValues: true
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, type: spill-slot, size: 8, alignment: 4 }
+ - { id: 1, type: spill-slot, size: 8, alignment: 4 }
+ - { id: 2, type: spill-slot, size: 8, alignment: 4 }
+ - { id: 3, type: spill-slot, size: 4, alignment: 4 }
+ - { id: 4, type: spill-slot, size: 8, alignment: 4 }
+ - { id: 5, type: spill-slot, size: 2028, alignment: 4 }
+machineFunctionInfo:
+ varArgsFrameIndex: 0
+ varArgsSaveSize: 0
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+
+ PseudoSD_RV32_OPT killed renamable $x10, killed renamable $x11, %stack.0, 0 :: (store (s64) into %stack.0, align 4)
+ PseudoSD_RV32_OPT killed renamable $x12, killed renamable $x13, %stack.1, 0 :: (store (s64) into %stack.1, align 4)
+ PseudoSD_RV32_OPT killed renamable $x14, killed renamable $x15, %stack.2, 0 :: (store (s64) into %stack.2, align 4)
+ PseudoSD_RV32_OPT killed renamable $x16, killed renamable $x17, %stack.4, 0 :: (store (s64) into %stack.4, align 4)
+ renamable $x10, renamable $x11 = PseudoLD_RV32_OPT %stack.0, 0 :: (load (s64) from %stack.0, align 4)
+ renamable $x12, renamable $x13 = PseudoLD_RV32_OPT %stack.1, 0 :: (load (s64) from %stack.1, align 4)
+ renamable $x14, renamable $x15 = PseudoLD_RV32_OPT %stack.2, 0 :: (load (s64) from %stack.2, align 4)
+ renamable $x16, renamable $x17 = PseudoLD_RV32_OPT %stack.4, 0 :: (load (s64) from %stack.4, align 4)
+ PseudoRET
+
+...
>From 01ff573688d19099f37605d52e7e891e745292e3 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Mon, 1 Dec 2025 13:02:16 -0800
Subject: [PATCH 10/16] Large spills fix
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 4 +++-
llvm/test/CodeGen/RISCV/zilsd-large-spill.mir | 6 ++++--
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index a5aef4bea46ab..8b714888fb754 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -547,7 +547,9 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// MIPS Prefetch instructions require the offset to be 9 bits encoded.
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
} else if ((Opc == RISCV::PseudoRV32ZdinxLD ||
- Opc == RISCV::PseudoRV32ZdinxSD) &&
+ Opc == RISCV::PseudoRV32ZdinxSD ||
+ Opc == RISCV::PseudoLD_RV32_OPT ||
+ Opc == RISCV::PseudoSD_RV32_OPT) &&
Lo12 >= 2044) {
// This instruction will be split into 2 instructions. The second
// instruction will add 4 to the immediate. If that would overflow 12
diff --git a/llvm/test/CodeGen/RISCV/zilsd-large-spill.mir b/llvm/test/CodeGen/RISCV/zilsd-large-spill.mir
index 70305e81788d6..0d1fe316cbc10 100644
--- a/llvm/test/CodeGen/RISCV/zilsd-large-spill.mir
+++ b/llvm/test/CodeGen/RISCV/zilsd-large-spill.mir
@@ -21,7 +21,8 @@
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: sd a4, -2040(a0) # 8-byte Folded Spill
- ; CHECK-NEXT: sd a6, 2044(sp) # 8-byte Folded Spill
+ ; CHECK-NEXT: addi a0, sp, 2044
+ ; CHECK-NEXT: sd a6, 0(a0) # 8-byte Folded Spill
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: ld a0, -2024(a0) # 8-byte Folded Reload
@@ -31,7 +32,8 @@
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: ld a4, -2040(a0) # 8-byte Folded Reload
- ; CHECK-NEXT: ld a6, 2044(sp) # 8-byte Folded Reload
+ ; CHECK-NEXT: addi a0, sp, 2044
+ ; CHECK-NEXT: ld a6, 0(a0) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 2032
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: .cfi_def_cfa_offset 0
>From 79aadd89bb2ab0254b2f1346b87582a702ddbedf Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Mon, 1 Dec 2025 13:02:32 -0800
Subject: [PATCH 11/16] Remove useless testcase
---
llvm/test/CodeGen/RISCV/zilsd.ll | 23 -----------------------
1 file changed, 23 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll
index bd0c81b35e9b5..7ff969414cce2 100644
--- a/llvm/test/CodeGen/RISCV/zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd.ll
@@ -212,26 +212,3 @@ entry:
store i64 %b, ptr %add.ptr, align 8
ret void
}
-
-define i64 @stack_access(ptr nocapture %p) nounwind {
-; CHECK-LABEL: stack_access:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: lw a2, 8(sp)
-; CHECK-NEXT: lw a1, 12(sp)
-; CHECK-NEXT: ld a4, 0(a0)
-; CHECK-NEXT: sw a2, 0(a0)
-; CHECK-NEXT: sw a1, 4(a0)
-; CHECK-NEXT: sd a4, 8(sp)
-; CHECK-NEXT: mv a0, a2
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
-entry:
- %stack = alloca i64, align 8
- %a = load i64, ptr %stack, align 8
- %b = load i64, ptr %p, align 8
- store i64 %a, ptr %p, align 8
- store i64 %b, ptr %stack, align 8
- %c = load i64, ptr %p, align 8
- ret i64 %c
-}
>From d955e3a0fba6bb5f9c0fb19fdee7b745fa2e70e2 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Sat, 28 Mar 2026 11:51:45 -0700
Subject: [PATCH 12/16] update tests
---
.../CodeGen/RISCV/fold-mem-offset-zilsd.ll | 56 ++++---
llvm/test/CodeGen/RISCV/zilsd-spill.ll | 153 +++++++++---------
2 files changed, 102 insertions(+), 107 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/fold-mem-offset-zilsd.ll b/llvm/test/CodeGen/RISCV/fold-mem-offset-zilsd.ll
index 040bee5803026..519e458145837 100644
--- a/llvm/test/CodeGen/RISCV/fold-mem-offset-zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/fold-mem-offset-zilsd.ll
@@ -5,12 +5,13 @@
define i64 @test_sh3add_load(ptr %p, i32 %x, i32 %y) {
; RV32ZILSD-LABEL: test_sh3add_load:
; RV32ZILSD: # %bb.0: # %entry
+; RV32ZILSD-NEXT: addi a0, a0, 400
; RV32ZILSD-NEXT: slli a1, a1, 3
; RV32ZILSD-NEXT: slli a2, a2, 3
; RV32ZILSD-NEXT: add a1, a1, a0
; RV32ZILSD-NEXT: add a0, a0, a2
-; RV32ZILSD-NEXT: ld a2, 480(a1)
-; RV32ZILSD-NEXT: ld a4, 400(a0)
+; RV32ZILSD-NEXT: ld a2, 80(a1)
+; RV32ZILSD-NEXT: ld a4, 0(a0)
; RV32ZILSD-NEXT: add a1, a5, a3
; RV32ZILSD-NEXT: add a0, a4, a2
; RV32ZILSD-NEXT: sltu a2, a0, a4
@@ -19,10 +20,11 @@ define i64 @test_sh3add_load(ptr %p, i32 %x, i32 %y) {
;
; RV32ZILSDZBA-LABEL: test_sh3add_load:
; RV32ZILSDZBA: # %bb.0: # %entry
+; RV32ZILSDZBA-NEXT: addi a0, a0, 400
; RV32ZILSDZBA-NEXT: sh3add a1, a1, a0
; RV32ZILSDZBA-NEXT: sh3add a0, a2, a0
-; RV32ZILSDZBA-NEXT: ld a2, 480(a1)
-; RV32ZILSDZBA-NEXT: ld a4, 400(a0)
+; RV32ZILSDZBA-NEXT: ld a2, 80(a1)
+; RV32ZILSDZBA-NEXT: ld a4, 0(a0)
; RV32ZILSDZBA-NEXT: add a1, a5, a3
; RV32ZILSDZBA-NEXT: add a0, a4, a2
; RV32ZILSDZBA-NEXT: sltu a2, a0, a4
@@ -82,27 +84,29 @@ define void @test_sh3add_store(ptr %p, i64 %x, i64 %y) {
; RV32ZILSD-LABEL: test_sh3add_store:
; RV32ZILSD: # %bb.0: # %entry
; RV32ZILSD-NEXT: mv a5, a4
-; RV32ZILSD-NEXT: mv a7, a2
; RV32ZILSD-NEXT: mv a4, a3
-; RV32ZILSD-NEXT: mv a6, a1
+; RV32ZILSD-NEXT: mv a3, a2
+; RV32ZILSD-NEXT: mv a2, a1
+; RV32ZILSD-NEXT: addi a0, a0, 400
; RV32ZILSD-NEXT: slli a1, a1, 3
-; RV32ZILSD-NEXT: slli a2, a3, 3
+; RV32ZILSD-NEXT: slli a6, a4, 3
; RV32ZILSD-NEXT: add a1, a0, a1
-; RV32ZILSD-NEXT: add a0, a0, a2
-; RV32ZILSD-NEXT: sd a6, 400(a1)
-; RV32ZILSD-NEXT: sd a4, 400(a0)
+; RV32ZILSD-NEXT: add a0, a0, a6
+; RV32ZILSD-NEXT: sd a2, 0(a1)
+; RV32ZILSD-NEXT: sd a4, 0(a0)
; RV32ZILSD-NEXT: ret
;
; RV32ZILSDZBA-LABEL: test_sh3add_store:
; RV32ZILSDZBA: # %bb.0: # %entry
; RV32ZILSDZBA-NEXT: mv a5, a4
-; RV32ZILSDZBA-NEXT: mv a7, a2
; RV32ZILSDZBA-NEXT: mv a4, a3
-; RV32ZILSDZBA-NEXT: mv a6, a1
+; RV32ZILSDZBA-NEXT: mv a3, a2
+; RV32ZILSDZBA-NEXT: mv a2, a1
+; RV32ZILSDZBA-NEXT: addi a0, a0, 400
; RV32ZILSDZBA-NEXT: sh3add a1, a1, a0
-; RV32ZILSDZBA-NEXT: sh3add a0, a3, a0
-; RV32ZILSDZBA-NEXT: sd a6, 400(a1)
-; RV32ZILSDZBA-NEXT: sd a4, 400(a0)
+; RV32ZILSDZBA-NEXT: sh3add a0, a4, a0
+; RV32ZILSDZBA-NEXT: sd a2, 0(a1)
+; RV32ZILSDZBA-NEXT: sd a4, 0(a0)
; RV32ZILSDZBA-NEXT: ret
entry:
%b = getelementptr inbounds nuw i8, ptr %p, i64 400
@@ -117,28 +121,28 @@ define void @test_sh3add_store_optsize(ptr %p, i64 %x, i64 %y) optsize {
; RV32ZILSD-LABEL: test_sh3add_store_optsize:
; RV32ZILSD: # %bb.0: # %entry
; RV32ZILSD-NEXT: mv a5, a4
-; RV32ZILSD-NEXT: mv a7, a2
-; RV32ZILSD-NEXT: addi a0, a0, 400
; RV32ZILSD-NEXT: mv a4, a3
-; RV32ZILSD-NEXT: mv a6, a1
+; RV32ZILSD-NEXT: mv a3, a2
+; RV32ZILSD-NEXT: mv a2, a1
+; RV32ZILSD-NEXT: addi a0, a0, 400
; RV32ZILSD-NEXT: slli a1, a1, 3
-; RV32ZILSD-NEXT: slli a2, a3, 3
+; RV32ZILSD-NEXT: slli a6, a4, 3
; RV32ZILSD-NEXT: add a1, a0, a1
-; RV32ZILSD-NEXT: add a0, a0, a2
-; RV32ZILSD-NEXT: sd a6, 0(a1)
+; RV32ZILSD-NEXT: add a0, a0, a6
+; RV32ZILSD-NEXT: sd a2, 0(a1)
; RV32ZILSD-NEXT: sd a4, 0(a0)
; RV32ZILSD-NEXT: ret
;
; RV32ZILSDZBA-LABEL: test_sh3add_store_optsize:
; RV32ZILSDZBA: # %bb.0: # %entry
; RV32ZILSDZBA-NEXT: mv a5, a4
-; RV32ZILSDZBA-NEXT: mv a7, a2
-; RV32ZILSDZBA-NEXT: addi a0, a0, 400
; RV32ZILSDZBA-NEXT: mv a4, a3
-; RV32ZILSDZBA-NEXT: mv a6, a1
+; RV32ZILSDZBA-NEXT: mv a3, a2
+; RV32ZILSDZBA-NEXT: mv a2, a1
+; RV32ZILSDZBA-NEXT: addi a0, a0, 400
; RV32ZILSDZBA-NEXT: sh3add a1, a1, a0
-; RV32ZILSDZBA-NEXT: sh3add a0, a3, a0
-; RV32ZILSDZBA-NEXT: sd a6, 0(a1)
+; RV32ZILSDZBA-NEXT: sh3add a0, a4, a0
+; RV32ZILSDZBA-NEXT: sd a2, 0(a1)
; RV32ZILSDZBA-NEXT: sd a4, 0(a0)
; RV32ZILSDZBA-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/zilsd-spill.ll b/llvm/test/CodeGen/RISCV/zilsd-spill.ll
index 41eccedd287a7..c14e9ed3b4675 100644
--- a/llvm/test/CodeGen/RISCV/zilsd-spill.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd-spill.ll
@@ -64,117 +64,108 @@ define i64 @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwi
; RV32I-ZILSD: bb.0 (%ir-block.0):
; RV32I-ZILSD-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x1, $x8_x9, $x18_x19, $x20_x21, $x22_x23, $x24_x25, $x26_x27
; RV32I-ZILSD-NEXT: {{ $}}
- ; RV32I-ZILSD-NEXT: $x2 = frame-setup ADDI $x2, -80
- ; RV32I-ZILSD-NEXT: frame-setup SW killed $x1, $x2, 76 :: (store (s32) into %stack.2)
- ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x8_x9, $x2, 64 :: (store (s64) into %stack.3)
- ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x18_x19, $x2, 56 :: (store (s64) into %stack.4)
- ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x20_x21, $x2, 48 :: (store (s64) into %stack.5)
- ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x22_x23, $x2, 40 :: (store (s64) into %stack.6)
- ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x24_x25, $x2, 32 :: (store (s64) into %stack.7)
- ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x26_x27, $x2, 24 :: (store (s64) into %stack.8)
- ; RV32I-ZILSD-NEXT: renamable $x17 = COPY $x12
- ; RV32I-ZILSD-NEXT: renamable $x16 = COPY $x11
- ; RV32I-ZILSD-NEXT: SD_RV32 killed renamable $x16_x17, $x2, 16 :: (store (s64) into %ir.1)
- ; RV32I-ZILSD-NEXT: $x11 = ADDI $x2, 16
+ ; RV32I-ZILSD-NEXT: $x2 = frame-setup ADDI $x2, -64
+ ; RV32I-ZILSD-NEXT: frame-setup SW killed $x1, $x2, 60 :: (store (s32) into %stack.1)
+ ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x8_x9, $x2, 48 :: (store (s64) into %stack.2)
+ ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x18_x19, $x2, 40 :: (store (s64) into %stack.3)
+ ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x20_x21, $x2, 32 :: (store (s64) into %stack.4)
+ ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x22_x23, $x2, 24 :: (store (s64) into %stack.5)
+ ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x24_x25, $x2, 16 :: (store (s64) into %stack.6)
+ ; RV32I-ZILSD-NEXT: frame-setup SD_RV32 killed $x26_x27, $x2, 8 :: (store (s64) into %stack.7)
+ ; RV32I-ZILSD-NEXT: renamable $x15 = COPY $x12
+ ; RV32I-ZILSD-NEXT: PseudoSD_RV32_OPT $x11, killed renamable $x15, $x2, 0 :: (store (s64) into %ir.1)
+ ; RV32I-ZILSD-NEXT: $x11 = ADDI $x2, 0
; RV32I-ZILSD-NEXT: $x12 = COPY killed renamable $x13
; RV32I-ZILSD-NEXT: $x13 = COPY killed renamable $x14
; RV32I-ZILSD-NEXT: $x14 = COPY $x0
; RV32I-ZILSD-NEXT: $x15 = COPY $x0
; RV32I-ZILSD-NEXT: PseudoCALL target-flags(riscv-call) @__atomic_compare_exchange_8, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit-def $x2, implicit-def dead $x10
- ; RV32I-ZILSD-NEXT: renamable $x10_x11 = LD_RV32 $x2, 16 :: (dereferenceable load (s64) from %ir.1)
- ; RV32I-ZILSD-NEXT: PseudoRV32ZdinxSD killed renamable $x10_x11, $x2, 8 :: (store (s64) into %stack.1, align 4)
+ ; RV32I-ZILSD-NEXT: renamable $x30, renamable $x5 = PseudoLD_RV32_OPT $x2, 0 :: (dereferenceable load (s64) from %ir.1)
; RV32I-ZILSD-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $x6, 12 /* clobber */, implicit-def dead early-clobber $x7, 12 /* clobber */, implicit-def dead early-clobber $x8, 12 /* clobber */, implicit-def dead early-clobber $x9, 12 /* clobber */, implicit-def dead early-clobber $x10, 12 /* clobber */, implicit-def dead early-clobber $x11, 12 /* clobber */, implicit-def dead early-clobber $x12, 12 /* clobber */, implicit-def dead early-clobber $x13, 12 /* clobber */, implicit-def dead early-clobber $x14, 12 /* clobber */, implicit-def dead early-clobber $x15, 12 /* clobber */, implicit-def dead early-clobber $x16, 12 /* clobber */, implicit-def dead early-clobber $x17, 12 /* clobber */, implicit-def dead early-clobber $x18, 12 /* clobber */, implicit-def dead early-clobber $x19, 12 /* clobber */, implicit-def dead early-clobber $x20, 12 /* clobber */, implicit-def dead early-clobber $x21, 12 /* clobber */, implicit-def dead early-clobber $x22, 12 /* clobber */, implicit-def dead early-clobber $x23, 12 /* clobber */, implicit-def dead early-clobber $x24, 12 /* clobber */, implicit-def dead early-clobber $x25, 12 /* clobber */, implicit-def dead early-clobber $x26, 12 /* clobber */, implicit-def dead early-clobber $x27, 12 /* clobber */, implicit-def dead early-clobber $x28, 12 /* clobber */, implicit-def dead early-clobber $x29, 12 /* clobber */, implicit-def dead early-clobber $x31
- ; RV32I-ZILSD-NEXT: renamable $x12_x13 = PseudoRV32ZdinxLD $x2, 8 :: (load (s64) from %stack.1, align 4)
- ; RV32I-ZILSD-NEXT: renamable $x10 = ADD renamable $x12, renamable $x12
- ; RV32I-ZILSD-NEXT: renamable $x11 = SLTU renamable $x10, renamable $x12
- ; RV32I-ZILSD-NEXT: renamable $x12 = ADD killed renamable $x13, renamable $x13
- ; RV32I-ZILSD-NEXT: renamable $x11 = ADD killed renamable $x12, killed renamable $x11
- ; RV32I-ZILSD-NEXT: $x1 = frame-destroy LW $x2, 76 :: (load (s32) from %stack.2)
- ; RV32I-ZILSD-NEXT: $x8_x9 = frame-destroy LD_RV32 $x2, 64 :: (load (s64) from %stack.3)
- ; RV32I-ZILSD-NEXT: $x18_x19 = frame-destroy LD_RV32 $x2, 56 :: (load (s64) from %stack.4)
- ; RV32I-ZILSD-NEXT: $x20_x21 = frame-destroy LD_RV32 $x2, 48 :: (load (s64) from %stack.5)
- ; RV32I-ZILSD-NEXT: $x22_x23 = frame-destroy LD_RV32 $x2, 40 :: (load (s64) from %stack.6)
- ; RV32I-ZILSD-NEXT: $x24_x25 = frame-destroy LD_RV32 $x2, 32 :: (load (s64) from %stack.7)
- ; RV32I-ZILSD-NEXT: $x26_x27 = frame-destroy LD_RV32 $x2, 24 :: (load (s64) from %stack.8)
- ; RV32I-ZILSD-NEXT: $x2 = frame-destroy ADDI $x2, 80
+ ; RV32I-ZILSD-NEXT: renamable $x10 = ADD renamable $x30, renamable $x30
+ ; RV32I-ZILSD-NEXT: renamable $x11 = SLTU renamable $x10, killed renamable $x30
+ ; RV32I-ZILSD-NEXT: renamable $x5 = ADD killed renamable $x5, renamable $x5
+ ; RV32I-ZILSD-NEXT: renamable $x11 = ADD killed renamable $x5, killed renamable $x11
+ ; RV32I-ZILSD-NEXT: $x1 = frame-destroy LW $x2, 60 :: (load (s32) from %stack.1)
+ ; RV32I-ZILSD-NEXT: $x8_x9 = frame-destroy LD_RV32 $x2, 48 :: (load (s64) from %stack.2)
+ ; RV32I-ZILSD-NEXT: $x18_x19 = frame-destroy LD_RV32 $x2, 40 :: (load (s64) from %stack.3)
+ ; RV32I-ZILSD-NEXT: $x20_x21 = frame-destroy LD_RV32 $x2, 32 :: (load (s64) from %stack.4)
+ ; RV32I-ZILSD-NEXT: $x22_x23 = frame-destroy LD_RV32 $x2, 24 :: (load (s64) from %stack.5)
+ ; RV32I-ZILSD-NEXT: $x24_x25 = frame-destroy LD_RV32 $x2, 16 :: (load (s64) from %stack.6)
+ ; RV32I-ZILSD-NEXT: $x26_x27 = frame-destroy LD_RV32 $x2, 8 :: (load (s64) from %stack.7)
+ ; RV32I-ZILSD-NEXT: $x2 = frame-destroy ADDI $x2, 64
; RV32I-ZILSD-NEXT: PseudoRET implicit $x10, implicit $x11
;
; RV32I-ZILSD-UNALIGNED-LABEL: name: cmpxchg_i64_monotonic_monotonic
; RV32I-ZILSD-UNALIGNED: bb.0 (%ir-block.0):
; RV32I-ZILSD-UNALIGNED-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x1, $x8_x9, $x18_x19, $x20_x21, $x22_x23, $x24_x25, $x26_x27
; RV32I-ZILSD-UNALIGNED-NEXT: {{ $}}
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x2 = frame-setup ADDI $x2, -80
- ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SW killed $x1, $x2, 76 :: (store (s32) into %stack.2)
- ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x8_x9, $x2, 68 :: (store (s64) into %stack.3, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x18_x19, $x2, 60 :: (store (s64) into %stack.4, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x20_x21, $x2, 52 :: (store (s64) into %stack.5, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x22_x23, $x2, 44 :: (store (s64) into %stack.6, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x24_x25, $x2, 36 :: (store (s64) into %stack.7, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x26_x27, $x2, 28 :: (store (s64) into %stack.8, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x17 = COPY $x12
- ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x16 = COPY $x11
- ; RV32I-ZILSD-UNALIGNED-NEXT: SD_RV32 killed renamable $x16_x17, $x2, 16 :: (store (s64) into %ir.1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x11 = ADDI $x2, 16
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x2 = frame-setup ADDI $x2, -64
+ ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SW killed $x1, $x2, 60 :: (store (s32) into %stack.1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x8_x9, $x2, 52 :: (store (s64) into %stack.2, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x18_x19, $x2, 44 :: (store (s64) into %stack.3, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x20_x21, $x2, 36 :: (store (s64) into %stack.4, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x22_x23, $x2, 28 :: (store (s64) into %stack.5, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x24_x25, $x2, 20 :: (store (s64) into %stack.6, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: frame-setup SD_RV32 killed $x26_x27, $x2, 12 :: (store (s64) into %stack.7, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x15 = COPY $x12
+ ; RV32I-ZILSD-UNALIGNED-NEXT: PseudoSD_RV32_OPT $x11, killed renamable $x15, $x2, 0 :: (store (s64) into %ir.1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x11 = ADDI $x2, 0
; RV32I-ZILSD-UNALIGNED-NEXT: $x12 = COPY killed renamable $x13
; RV32I-ZILSD-UNALIGNED-NEXT: $x13 = COPY killed renamable $x14
; RV32I-ZILSD-UNALIGNED-NEXT: $x14 = COPY $x0
; RV32I-ZILSD-UNALIGNED-NEXT: $x15 = COPY $x0
; RV32I-ZILSD-UNALIGNED-NEXT: PseudoCALL target-flags(riscv-call) @__atomic_compare_exchange_8, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit-def $x2, implicit-def dead $x10
- ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x10_x11 = LD_RV32 $x2, 16 :: (dereferenceable load (s64) from %ir.1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: SD_RV32 killed renamable $x10_x11, $x2, 8 :: (store (s64) into %stack.1, align 4)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x30, renamable $x5 = PseudoLD_RV32_OPT $x2, 0 :: (dereferenceable load (s64) from %ir.1)
; RV32I-ZILSD-UNALIGNED-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $x6, 12 /* clobber */, implicit-def dead early-clobber $x7, 12 /* clobber */, implicit-def dead early-clobber $x8, 12 /* clobber */, implicit-def dead early-clobber $x9, 12 /* clobber */, implicit-def dead early-clobber $x10, 12 /* clobber */, implicit-def dead early-clobber $x11, 12 /* clobber */, implicit-def dead early-clobber $x12, 12 /* clobber */, implicit-def dead early-clobber $x13, 12 /* clobber */, implicit-def dead early-clobber $x14, 12 /* clobber */, implicit-def dead early-clobber $x15, 12 /* clobber */, implicit-def dead early-clobber $x16, 12 /* clobber */, implicit-def dead early-clobber $x17, 12 /* clobber */, implicit-def dead early-clobber $x18, 12 /* clobber */, implicit-def dead early-clobber $x19, 12 /* clobber */, implicit-def dead early-clobber $x20, 12 /* clobber */, implicit-def dead early-clobber $x21, 12 /* clobber */, implicit-def dead early-clobber $x22, 12 /* clobber */, implicit-def dead early-clobber $x23, 12 /* clobber */, implicit-def dead early-clobber $x24, 12 /* clobber */, implicit-def dead early-clobber $x25, 12 /* clobber */, implicit-def dead early-clobber $x26, 12 /* clobber */, implicit-def dead early-clobber $x27, 12 /* clobber */, implicit-def dead early-clobber $x28, 12 /* clobber */, implicit-def dead early-clobber $x29, 12 /* clobber */, implicit-def dead early-clobber $x31
- ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x12_x13 = LD_RV32 $x2, 8 :: (load (s64) from %stack.1, align 4)
- ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x10 = ADD renamable $x12, renamable $x12
- ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x11 = SLTU renamable $x10, renamable $x12
- ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x12 = ADD killed renamable $x13, renamable $x13
- ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x11 = ADD killed renamable $x12, killed renamable $x11
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x1 = frame-destroy LW $x2, 76 :: (load (s32) from %stack.2)
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x8_x9 = frame-destroy LD_RV32 $x2, 68 :: (load (s64) from %stack.3, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x18_x19 = frame-destroy LD_RV32 $x2, 60 :: (load (s64) from %stack.4, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x20_x21 = frame-destroy LD_RV32 $x2, 52 :: (load (s64) from %stack.5, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x22_x23 = frame-destroy LD_RV32 $x2, 44 :: (load (s64) from %stack.6, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x24_x25 = frame-destroy LD_RV32 $x2, 36 :: (load (s64) from %stack.7, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x26_x27 = frame-destroy LD_RV32 $x2, 28 :: (load (s64) from %stack.8, align 1)
- ; RV32I-ZILSD-UNALIGNED-NEXT: $x2 = frame-destroy ADDI $x2, 80
+ ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x10 = ADD renamable $x30, renamable $x30
+ ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x11 = SLTU renamable $x10, killed renamable $x30
+ ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x5 = ADD killed renamable $x5, renamable $x5
+ ; RV32I-ZILSD-UNALIGNED-NEXT: renamable $x11 = ADD killed renamable $x5, killed renamable $x11
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x1 = frame-destroy LW $x2, 60 :: (load (s32) from %stack.1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x8_x9 = frame-destroy LD_RV32 $x2, 52 :: (load (s64) from %stack.2, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x18_x19 = frame-destroy LD_RV32 $x2, 44 :: (load (s64) from %stack.3, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x20_x21 = frame-destroy LD_RV32 $x2, 36 :: (load (s64) from %stack.4, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x22_x23 = frame-destroy LD_RV32 $x2, 28 :: (load (s64) from %stack.5, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x24_x25 = frame-destroy LD_RV32 $x2, 20 :: (load (s64) from %stack.6, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x26_x27 = frame-destroy LD_RV32 $x2, 12 :: (load (s64) from %stack.7, align 1)
+ ; RV32I-ZILSD-UNALIGNED-NEXT: $x2 = frame-destroy ADDI $x2, 64
; RV32I-ZILSD-UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11
;
; RV32I-ZILSD-4BYTEALIGN-LABEL: name: cmpxchg_i64_monotonic_monotonic
; RV32I-ZILSD-4BYTEALIGN: bb.0 (%ir-block.0):
; RV32I-ZILSD-4BYTEALIGN-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x1, $x8_x9, $x18_x19, $x20_x21, $x22_x23, $x24_x25, $x26_x27
; RV32I-ZILSD-4BYTEALIGN-NEXT: {{ $}}
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x2 = frame-setup ADDI $x2, -80
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SW killed $x1, $x2, 76 :: (store (s32) into %stack.2)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x8_x9, $x2, 68 :: (store (s64) into %stack.3, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x18_x19, $x2, 60 :: (store (s64) into %stack.4, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x20_x21, $x2, 52 :: (store (s64) into %stack.5, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x22_x23, $x2, 44 :: (store (s64) into %stack.6, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x24_x25, $x2, 36 :: (store (s64) into %stack.7, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x26_x27, $x2, 28 :: (store (s64) into %stack.8, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x17 = COPY $x12
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x16 = COPY $x11
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: SD_RV32 killed renamable $x16_x17, $x2, 16 :: (store (s64) into %ir.1)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x11 = ADDI $x2, 16
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x2 = frame-setup ADDI $x2, -64
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SW killed $x1, $x2, 60 :: (store (s32) into %stack.1)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x8_x9, $x2, 52 :: (store (s64) into %stack.2, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x18_x19, $x2, 44 :: (store (s64) into %stack.3, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x20_x21, $x2, 36 :: (store (s64) into %stack.4, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x22_x23, $x2, 28 :: (store (s64) into %stack.5, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x24_x25, $x2, 20 :: (store (s64) into %stack.6, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: frame-setup SD_RV32 killed $x26_x27, $x2, 12 :: (store (s64) into %stack.7, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x15 = COPY $x12
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: PseudoSD_RV32_OPT $x11, killed renamable $x15, $x2, 0 :: (store (s64) into %ir.1)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x11 = ADDI $x2, 0
; RV32I-ZILSD-4BYTEALIGN-NEXT: $x12 = COPY killed renamable $x13
; RV32I-ZILSD-4BYTEALIGN-NEXT: $x13 = COPY killed renamable $x14
; RV32I-ZILSD-4BYTEALIGN-NEXT: $x14 = COPY $x0
; RV32I-ZILSD-4BYTEALIGN-NEXT: $x15 = COPY $x0
; RV32I-ZILSD-4BYTEALIGN-NEXT: PseudoCALL target-flags(riscv-call) @__atomic_compare_exchange_8, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit-def $x2, implicit-def dead $x10
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x10_x11 = LD_RV32 $x2, 16 :: (dereferenceable load (s64) from %ir.1)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: SD_RV32 killed renamable $x10_x11, $x2, 8 :: (store (s64) into %stack.1, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x30, renamable $x5 = PseudoLD_RV32_OPT $x2, 0 :: (dereferenceable load (s64) from %ir.1)
; RV32I-ZILSD-4BYTEALIGN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $x6, 12 /* clobber */, implicit-def dead early-clobber $x7, 12 /* clobber */, implicit-def dead early-clobber $x8, 12 /* clobber */, implicit-def dead early-clobber $x9, 12 /* clobber */, implicit-def dead early-clobber $x10, 12 /* clobber */, implicit-def dead early-clobber $x11, 12 /* clobber */, implicit-def dead early-clobber $x12, 12 /* clobber */, implicit-def dead early-clobber $x13, 12 /* clobber */, implicit-def dead early-clobber $x14, 12 /* clobber */, implicit-def dead early-clobber $x15, 12 /* clobber */, implicit-def dead early-clobber $x16, 12 /* clobber */, implicit-def dead early-clobber $x17, 12 /* clobber */, implicit-def dead early-clobber $x18, 12 /* clobber */, implicit-def dead early-clobber $x19, 12 /* clobber */, implicit-def dead early-clobber $x20, 12 /* clobber */, implicit-def dead early-clobber $x21, 12 /* clobber */, implicit-def dead early-clobber $x22, 12 /* clobber */, implicit-def dead early-clobber $x23, 12 /* clobber */, implicit-def dead early-clobber $x24, 12 /* clobber */, implicit-def dead early-clobber $x25, 12 /* clobber */, implicit-def dead early-clobber $x26, 12 /* clobber */, implicit-def dead early-clobber $x27, 12 /* clobber */, implicit-def dead early-clobber $x28, 12 /* clobber */, implicit-def dead early-clobber $x29, 12 /* clobber */, implicit-def dead early-clobber $x31
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x12_x13 = LD_RV32 $x2, 8 :: (load (s64) from %stack.1, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x10 = ADD renamable $x12, renamable $x12
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x11 = SLTU renamable $x10, renamable $x12
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x12 = ADD killed renamable $x13, renamable $x13
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x11 = ADD killed renamable $x12, killed renamable $x11
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x1 = frame-destroy LW $x2, 76 :: (load (s32) from %stack.2)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x8_x9 = frame-destroy LD_RV32 $x2, 68 :: (load (s64) from %stack.3, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x18_x19 = frame-destroy LD_RV32 $x2, 60 :: (load (s64) from %stack.4, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x20_x21 = frame-destroy LD_RV32 $x2, 52 :: (load (s64) from %stack.5, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x22_x23 = frame-destroy LD_RV32 $x2, 44 :: (load (s64) from %stack.6, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x24_x25 = frame-destroy LD_RV32 $x2, 36 :: (load (s64) from %stack.7, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x26_x27 = frame-destroy LD_RV32 $x2, 28 :: (load (s64) from %stack.8, align 4)
- ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x2 = frame-destroy ADDI $x2, 80
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x10 = ADD renamable $x30, renamable $x30
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x11 = SLTU renamable $x10, killed renamable $x30
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x5 = ADD killed renamable $x5, renamable $x5
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: renamable $x11 = ADD killed renamable $x5, killed renamable $x11
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x1 = frame-destroy LW $x2, 60 :: (load (s32) from %stack.1)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x8_x9 = frame-destroy LD_RV32 $x2, 52 :: (load (s64) from %stack.2, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x18_x19 = frame-destroy LD_RV32 $x2, 44 :: (load (s64) from %stack.3, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x20_x21 = frame-destroy LD_RV32 $x2, 36 :: (load (s64) from %stack.4, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x22_x23 = frame-destroy LD_RV32 $x2, 28 :: (load (s64) from %stack.5, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x24_x25 = frame-destroy LD_RV32 $x2, 20 :: (load (s64) from %stack.6, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x26_x27 = frame-destroy LD_RV32 $x2, 12 :: (load (s64) from %stack.7, align 4)
+ ; RV32I-ZILSD-4BYTEALIGN-NEXT: $x2 = frame-destroy ADDI $x2, 64
; RV32I-ZILSD-4BYTEALIGN-NEXT: PseudoRET implicit $x10, implicit $x11
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
%1 = extractvalue { i64, i1 } %res, 0
>From 33cb6b67156606972a8f4e7cfd50bad406751b18 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Sat, 28 Mar 2026 12:09:56 -0700
Subject: [PATCH 13/16] Zilsd Pseudos for FoldMemOffset
---
llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp | 30 +++++++++++++++----
.../CodeGen/RISCV/fold-mem-offset-zilsd.ll | 20 +++++--------
2 files changed, 32 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp b/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp
index 87c08a500545a..c3e557b9ab2e3 100644
--- a/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp
@@ -181,28 +181,40 @@ bool RISCVFoldMemOffset::foldOffset(
case RISCV::FSW:
case RISCV::LD:
case RISCV::LD_RV32:
+ case RISCV::PseudoLD_RV32_OPT:
case RISCV::FLD:
case RISCV::SD:
case RISCV::SD_RV32:
+ case RISCV::PseudoSD_RV32_OPT:
case RISCV::FSD: {
// Can't fold into store value.
if (User.getOperand(0).getReg() == Reg)
return false;
+ unsigned AddrRegIdx = 1, OffsetIdx = 2;
+ if (User.getOpcode() == RISCV::PseudoLD_RV32_OPT ||
+ User.getOpcode() == RISCV::PseudoSD_RV32_OPT) {
+ AddrRegIdx = 2;
+ OffsetIdx = 3;
+
+ if (User.getOperand(1).getReg() == Reg)
+ return false;
+ }
+
// Existing offset must be immediate.
- if (!User.getOperand(2).isImm())
+ if (!User.getOperand(OffsetIdx).isImm())
return false;
// Require at least one operation between the ADDI and the load/store.
// We have other optimizations that should handle the simple case.
- if (User.getOperand(1).getReg() == OrigReg)
+ if (User.getOperand(AddrRegIdx).getReg() == OrigReg)
return false;
- auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
+ auto I = RegToOffsetMap.find(User.getOperand(AddrRegIdx).getReg());
if (I == RegToOffsetMap.end())
return false;
- int64_t LocalOffset = User.getOperand(2).getImm();
+ int64_t LocalOffset = User.getOperand(OffsetIdx).getImm();
assert(isInt<12>(LocalOffset));
int64_t CombinedOffset = (uint64_t)LocalOffset + (uint64_t)I->second;
if (!isInt<12>(CombinedOffset))
@@ -272,8 +284,14 @@ bool RISCVFoldMemOffset::runOnMachineFunction(MachineFunction &MF) {
// We can fold this ADDI.
// Rewrite all the instructions.
- for (auto [MemMI, NewOffset] : FoldableInstrs)
- MemMI->getOperand(2).setImm(NewOffset);
+ for (auto [MemMI, NewOffset] : FoldableInstrs) {
+ unsigned OffsetIdx = 2;
+ if (MemMI->getOpcode() == RISCV::PseudoLD_RV32_OPT ||
+ MemMI->getOpcode() == RISCV::PseudoSD_RV32_OPT)
+ OffsetIdx = 3;
+
+ MemMI->getOperand(OffsetIdx).setImm(NewOffset);
+ }
MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
MRI.clearKillFlags(MI.getOperand(1).getReg());
diff --git a/llvm/test/CodeGen/RISCV/fold-mem-offset-zilsd.ll b/llvm/test/CodeGen/RISCV/fold-mem-offset-zilsd.ll
index 519e458145837..60ba53fb7ddb9 100644
--- a/llvm/test/CodeGen/RISCV/fold-mem-offset-zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/fold-mem-offset-zilsd.ll
@@ -5,13 +5,12 @@
define i64 @test_sh3add_load(ptr %p, i32 %x, i32 %y) {
; RV32ZILSD-LABEL: test_sh3add_load:
; RV32ZILSD: # %bb.0: # %entry
-; RV32ZILSD-NEXT: addi a0, a0, 400
; RV32ZILSD-NEXT: slli a1, a1, 3
; RV32ZILSD-NEXT: slli a2, a2, 3
; RV32ZILSD-NEXT: add a1, a1, a0
; RV32ZILSD-NEXT: add a0, a0, a2
-; RV32ZILSD-NEXT: ld a2, 80(a1)
-; RV32ZILSD-NEXT: ld a4, 0(a0)
+; RV32ZILSD-NEXT: ld a2, 480(a1)
+; RV32ZILSD-NEXT: ld a4, 400(a0)
; RV32ZILSD-NEXT: add a1, a5, a3
; RV32ZILSD-NEXT: add a0, a4, a2
; RV32ZILSD-NEXT: sltu a2, a0, a4
@@ -20,11 +19,10 @@ define i64 @test_sh3add_load(ptr %p, i32 %x, i32 %y) {
;
; RV32ZILSDZBA-LABEL: test_sh3add_load:
; RV32ZILSDZBA: # %bb.0: # %entry
-; RV32ZILSDZBA-NEXT: addi a0, a0, 400
; RV32ZILSDZBA-NEXT: sh3add a1, a1, a0
; RV32ZILSDZBA-NEXT: sh3add a0, a2, a0
-; RV32ZILSDZBA-NEXT: ld a2, 80(a1)
-; RV32ZILSDZBA-NEXT: ld a4, 0(a0)
+; RV32ZILSDZBA-NEXT: ld a2, 480(a1)
+; RV32ZILSDZBA-NEXT: ld a4, 400(a0)
; RV32ZILSDZBA-NEXT: add a1, a5, a3
; RV32ZILSDZBA-NEXT: add a0, a4, a2
; RV32ZILSDZBA-NEXT: sltu a2, a0, a4
@@ -87,13 +85,12 @@ define void @test_sh3add_store(ptr %p, i64 %x, i64 %y) {
; RV32ZILSD-NEXT: mv a4, a3
; RV32ZILSD-NEXT: mv a3, a2
; RV32ZILSD-NEXT: mv a2, a1
-; RV32ZILSD-NEXT: addi a0, a0, 400
; RV32ZILSD-NEXT: slli a1, a1, 3
; RV32ZILSD-NEXT: slli a6, a4, 3
; RV32ZILSD-NEXT: add a1, a0, a1
; RV32ZILSD-NEXT: add a0, a0, a6
-; RV32ZILSD-NEXT: sd a2, 0(a1)
-; RV32ZILSD-NEXT: sd a4, 0(a0)
+; RV32ZILSD-NEXT: sd a2, 400(a1)
+; RV32ZILSD-NEXT: sd a4, 400(a0)
; RV32ZILSD-NEXT: ret
;
; RV32ZILSDZBA-LABEL: test_sh3add_store:
@@ -102,11 +99,10 @@ define void @test_sh3add_store(ptr %p, i64 %x, i64 %y) {
; RV32ZILSDZBA-NEXT: mv a4, a3
; RV32ZILSDZBA-NEXT: mv a3, a2
; RV32ZILSDZBA-NEXT: mv a2, a1
-; RV32ZILSDZBA-NEXT: addi a0, a0, 400
; RV32ZILSDZBA-NEXT: sh3add a1, a1, a0
; RV32ZILSDZBA-NEXT: sh3add a0, a4, a0
-; RV32ZILSDZBA-NEXT: sd a2, 0(a1)
-; RV32ZILSDZBA-NEXT: sd a4, 0(a0)
+; RV32ZILSDZBA-NEXT: sd a2, 400(a1)
+; RV32ZILSDZBA-NEXT: sd a4, 400(a0)
; RV32ZILSDZBA-NEXT: ret
entry:
%b = getelementptr inbounds nuw i8, ptr %p, i64 400
>From 97a3a104b6ac560d7346564548f7f9465af787e0 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Sat, 28 Mar 2026 12:27:42 -0700
Subject: [PATCH 14/16] Pre-commit merge-base-offset-zilsd.ll changes
---
.../RISCV/fold-addi-loadstore-zilsd.ll | 251 +++++++++++++++++-
1 file changed, 245 insertions(+), 6 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore-zilsd.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore-zilsd.ll
index 7ad24b1be9fbc..074ff2880840d 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore-zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore-zilsd.ll
@@ -28,6 +28,245 @@ entry:
ret void
}
+ at h_0 = dso_local global i64 0
+ at h_1 = dso_local global i64 0, align 1
+ at h_2 = dso_local global i64 0, align 2
+ at h_4 = dso_local global i64 0, align 4
+ at h_8 = dso_local global i64 0, align 8
+ at h_16 = dso_local global i64 0, align 16
+
+define dso_local i64 @load_h_0() nounwind {
+; CHECK-LABEL: load_h_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi2:
+; CHECK-NEXT: auipc a0, %pcrel_hi(h_0)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi2)
+; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @h_0
+ ret i64 %0
+}
+
+define dso_local i64 @load_h_1() nounwind {
+; CHECK-LABEL: load_h_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi3:
+; CHECK-NEXT: auipc a0, %pcrel_hi(h_1)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi3)
+; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @h_1
+ ret i64 %0
+}
+
+define dso_local i64 @load_h_2() nounwind {
+; CHECK-LABEL: load_h_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi4:
+; CHECK-NEXT: auipc a0, %pcrel_hi(h_2)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi4)
+; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @h_2
+ ret i64 %0
+}
+
+define dso_local i64 @load_h_4() nounwind {
+; CHECK-LABEL: load_h_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi5:
+; CHECK-NEXT: auipc a0, %pcrel_hi(h_4)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi5)
+; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @h_4
+ ret i64 %0
+}
+
+define dso_local i64 @load_h_8() nounwind {
+; CHECK-LABEL: load_h_8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi6:
+; CHECK-NEXT: auipc a0, %pcrel_hi(h_8)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi6)
+; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @h_8
+ ret i64 %0
+}
+
+define dso_local i64 @load_h_16() nounwind {
+; CHECK-LABEL: load_h_16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi7:
+; CHECK-NEXT: auipc a0, %pcrel_hi(h_16)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi7)
+; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @h_16
+ ret i64 %0
+}
+
+define dso_local void @store_h_4() nounwind {
+; CHECK-LABEL: store_h_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi8:
+; CHECK-NEXT: auipc a0, %pcrel_hi(h_4)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi8)
+; CHECK-NEXT: sd zero, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ store i64 0, ptr @h_4
+ ret void
+}
+
+define dso_local void @store_h_8() nounwind {
+; CHECK-LABEL: store_h_8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi9:
+; CHECK-NEXT: auipc a0, %pcrel_hi(h_8)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi9)
+; CHECK-NEXT: sd zero, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ store i64 0, ptr @h_8
+ ret void
+}
+
+ at ga_8 = dso_local local_unnamed_addr global [2 x i64] zeroinitializer, align 8
+ at ga_16 = dso_local local_unnamed_addr global [2 x i64] zeroinitializer, align 16
+
+define dso_local i64 @load_ga_8() nounwind {
+; CHECK-LABEL: load_ga_8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi10:
+; CHECK-NEXT: auipc a0, %pcrel_hi(ga_8)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi10)
+; CHECK-NEXT: ld a0, 8(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr getelementptr inbounds ([2 x i64], ptr @ga_8, i32 0, i32 1)
+ ret i64 %0
+}
+
+define dso_local i64 @load_ga_16() nounwind {
+; CHECK-LABEL: load_ga_16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi11:
+; CHECK-NEXT: auipc a0, %pcrel_hi(ga_16)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi11)
+; CHECK-NEXT: ld a0, 8(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr getelementptr inbounds ([2 x i64], ptr @ga_16, i32 0, i32 1)
+ ret i64 %0
+}
+
+; Check if we can fold ADDI into the offset of store instructions,
+; when store instructions is the root node in DAG.
+
+ at g_4_i64 = global i64 0, align 4
+
+define dso_local void @inc_g_i64() nounwind {
+; CHECK-LABEL: inc_g_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .Lpcrel_hi12:
+; CHECK-NEXT: auipc a0, %pcrel_hi(g_4_i64)
+; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi12)
+; CHECK-NEXT: ld a2, 0(a0)
+; CHECK-NEXT: addi a2, a2, 1
+; CHECK-NEXT: seqz a1, a2
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: sd a2, 0(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @g_4_i64
+ %inc = add i64 %0, 1
+ store i64 %inc, ptr @g_4_i64
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+ at tl_4 = dso_local thread_local global i64 0, align 4
+ at tl_8 = dso_local thread_local global i64 0, align 8
+
+define dso_local i64 @load_tl_4() nounwind {
+; CHECK-LABEL: load_tl_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, %tprel_hi(tl_4)
+; CHECK-NEXT: add a0, a0, tp, %tprel_add(tl_4)
+; CHECK-NEXT: ld a0, %tprel_lo(tl_4)(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @tl_4
+ ret i64 %0
+}
+
+define dso_local i64 @load_tl_8() nounwind {
+; CHECK-LABEL: load_tl_8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, %tprel_hi(tl_8)
+; CHECK-NEXT: add a0, a0, tp, %tprel_add(tl_8)
+; CHECK-NEXT: ld a0, %tprel_lo(tl_8)(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr @tl_8
+ ret i64 %0
+}
+
+define dso_local i64 @load_const_ok() nounwind {
+; CHECK-LABEL: load_const_ok:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld a0, 2040(zero)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr inttoptr (i32 2040 to ptr)
+ ret i64 %0
+}
+
+define dso_local i64 @load_cost_overflow() nounwind {
+; CHECK-LABEL: load_cost_overflow:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: ld a0, 2044(zero)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr inttoptr (i64 2044 to ptr)
+ ret i64 %0
+}
+
+define dso_local i64 @load_const_medium() nounwind {
+; CHECK-LABEL: load_const_medium:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, 1
+; CHECK-NEXT: ld a0, -16(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr inttoptr (i64 4080 to ptr)
+ ret i64 %0
+}
+
+; The constant here is 0x7ffff800, this value requires LUI+ADDIW on RV64,
+; LUI+ADDI would produce a different constant so we can't fold into the load
+; offset.
+define dso_local i64 @load_const_large() nounwind {
+; CHECK-LABEL: load_const_large:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: ld a0, -2048(a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = load i64, ptr inttoptr (i64 2147481600 to ptr)
+ ret i64 %0
+}
+
%struct.S = type { double, double }
define double @fold_addi_from_different_bb(i32 %k, i32 %n, ptr %a) nounwind {
@@ -38,25 +277,25 @@ define double @fold_addi_from_different_bb(i32 %k, i32 %n, ptr %a) nounwind {
; CHECK-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: blez a1, .LBB2_3
+; CHECK-NEXT: blez a1, .LBB19_3
; CHECK-NEXT: # %bb.1: # %for.body.lr.ph
; CHECK-NEXT: mv s2, a2
; CHECK-NEXT: mv s3, a1
; CHECK-NEXT: fmv.d s0, zero
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add s4, a2, a0
-; CHECK-NEXT: .LBB2_2: # %for.body
+; CHECK-NEXT: .LBB19_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: mv a0, s2
; CHECK-NEXT: call f
; CHECK-NEXT: ld a0, 8(s4)
; CHECK-NEXT: addi s3, s3, -1
; CHECK-NEXT: fadd.d s0, a0, s0
-; CHECK-NEXT: bnez s3, .LBB2_2
-; CHECK-NEXT: j .LBB2_4
-; CHECK-NEXT: .LBB2_3:
+; CHECK-NEXT: bnez s3, .LBB19_2
+; CHECK-NEXT: j .LBB19_4
+; CHECK-NEXT: .LBB19_3:
; CHECK-NEXT: fmv.d s0, zero
-; CHECK-NEXT: .LBB2_4: # %for.cond.cleanup
+; CHECK-NEXT: .LBB19_4: # %for.cond.cleanup
; CHECK-NEXT: fmv.d a0, s0
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
>From dfaa10e0e7472b1a3eaf83167879e84d8fdcf0c8 Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Sat, 28 Mar 2026 12:40:46 -0700
Subject: [PATCH 15/16] Zilsd Pseudos in MergeBaseOffset
---
.../lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 18 +++++++++
.../RISCV/fold-addi-loadstore-zilsd.ll | 39 +++++++------------
2 files changed, 32 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 503e95fbd23f7..cd0318e096399 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -474,6 +474,22 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
CommonOffset = Offset;
break;
}
+ case RISCV::PseudoLD_RV32_OPT:
+ case RISCV::PseudoSD_RV32_OPT: {
+ if (UseMI.getOperand(2).isFI())
+ return false;
+ // Register defined by Lo should not be the value register.
+ if (DestReg == UseMI.getOperand(0).getReg() || DestReg == UseMI.getOperand(1).getReg() )
+ return false;
+ assert(DestReg == UseMI.getOperand(2).getReg() &&
+ "Expected base address use");
+ // All load/store instructions must use the same offset.
+ int64_t Offset = UseMI.getOperand(3).getImm();
+ if (CommonOffset && Offset != CommonOffset)
+ return false;
+ CommonOffset = Offset;
+ break;
+ }
case RISCV::PseudoCCLD:
case RISCV::PseudoCCLW:
case RISCV::PseudoCCLWU:
@@ -604,6 +620,8 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
case RISCV::PseudoCCLHU:
case RISCV::PseudoCCLB:
case RISCV::PseudoCCLBU:
+ case RISCV::PseudoLD_RV32_OPT:
+ case RISCV::PseudoSD_RV32_OPT:
ImmIdx = 3;
break;
case RISCV::LB:
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore-zilsd.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore-zilsd.ll
index 074ff2880840d..a9ca098429d87 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore-zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore-zilsd.ll
@@ -40,8 +40,7 @@ define dso_local i64 @load_h_0() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi2:
; CHECK-NEXT: auipc a0, %pcrel_hi(h_0)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi2)
-; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi2)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr @h_0
@@ -53,8 +52,7 @@ define dso_local i64 @load_h_1() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi3:
; CHECK-NEXT: auipc a0, %pcrel_hi(h_1)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi3)
-; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr @h_1
@@ -66,8 +64,7 @@ define dso_local i64 @load_h_2() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi4:
; CHECK-NEXT: auipc a0, %pcrel_hi(h_2)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi4)
-; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr @h_2
@@ -79,8 +76,7 @@ define dso_local i64 @load_h_4() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi5:
; CHECK-NEXT: auipc a0, %pcrel_hi(h_4)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi5)
-; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi5)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr @h_4
@@ -92,8 +88,7 @@ define dso_local i64 @load_h_8() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi6:
; CHECK-NEXT: auipc a0, %pcrel_hi(h_8)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi6)
-; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi6)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr @h_8
@@ -105,8 +100,7 @@ define dso_local i64 @load_h_16() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi7:
; CHECK-NEXT: auipc a0, %pcrel_hi(h_16)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi7)
-; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi7)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr @h_16
@@ -118,8 +112,7 @@ define dso_local void @store_h_4() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi8:
; CHECK-NEXT: auipc a0, %pcrel_hi(h_4)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi8)
-; CHECK-NEXT: sd zero, 0(a0)
+; CHECK-NEXT: sd zero, %pcrel_lo(.Lpcrel_hi8)(a0)
; CHECK-NEXT: ret
entry:
store i64 0, ptr @h_4
@@ -131,8 +124,7 @@ define dso_local void @store_h_8() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi9:
; CHECK-NEXT: auipc a0, %pcrel_hi(h_8)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi9)
-; CHECK-NEXT: sd zero, 0(a0)
+; CHECK-NEXT: sd zero, %pcrel_lo(.Lpcrel_hi9)(a0)
; CHECK-NEXT: ret
entry:
store i64 0, ptr @h_8
@@ -146,9 +138,8 @@ define dso_local i64 @load_ga_8() nounwind {
; CHECK-LABEL: load_ga_8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi10:
-; CHECK-NEXT: auipc a0, %pcrel_hi(ga_8)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi10)
-; CHECK-NEXT: ld a0, 8(a0)
+; CHECK-NEXT: auipc a0, %pcrel_hi(ga_8+8)
+; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi10)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr getelementptr inbounds ([2 x i64], ptr @ga_8, i32 0, i32 1)
@@ -159,9 +150,8 @@ define dso_local i64 @load_ga_16() nounwind {
; CHECK-LABEL: load_ga_16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi11:
-; CHECK-NEXT: auipc a0, %pcrel_hi(ga_16)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi11)
-; CHECK-NEXT: ld a0, 8(a0)
+; CHECK-NEXT: auipc a0, %pcrel_hi(ga_16+8)
+; CHECK-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi11)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr getelementptr inbounds ([2 x i64], ptr @ga_16, i32 0, i32 1)
@@ -178,12 +168,11 @@ define dso_local void @inc_g_i64() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .Lpcrel_hi12:
; CHECK-NEXT: auipc a0, %pcrel_hi(g_4_i64)
-; CHECK-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi12)
-; CHECK-NEXT: ld a2, 0(a0)
+; CHECK-NEXT: ld a2, %pcrel_lo(.Lpcrel_hi12)(a0)
; CHECK-NEXT: addi a2, a2, 1
; CHECK-NEXT: seqz a1, a2
; CHECK-NEXT: add a3, a3, a1
-; CHECK-NEXT: sd a2, 0(a0)
+; CHECK-NEXT: sd a2, %pcrel_lo(.Lpcrel_hi12)(a0)
; CHECK-NEXT: ret
entry:
%0 = load i64, ptr @g_4_i64
>From 0a09cc78c280e71e3e110de976fcce6886e70cbe Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Sat, 28 Mar 2026 13:35:32 -0700
Subject: [PATCH 16/16] clang-format
---
llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index cd0318e096399..b8bc72967da6b 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -479,7 +479,8 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
if (UseMI.getOperand(2).isFI())
return false;
// Register defined by Lo should not be the value register.
- if (DestReg == UseMI.getOperand(0).getReg() || DestReg == UseMI.getOperand(1).getReg() )
+ if (DestReg == UseMI.getOperand(0).getReg() ||
+ DestReg == UseMI.getOperand(1).getReg())
return false;
assert(DestReg == UseMI.getOperand(2).getReg() &&
"Expected base address use");
More information about the llvm-commits
mailing list