[llvm] [RISCV] Add profitability checks to SelectAddrRegRegScale. (PR #150135)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 22 15:47:49 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
-Only fold if the ADD can be folded into all uses.
-Don't reassociate an ADDI if the shl+add can be a shxadd or similar instruction.
-Only reassociate a single ADDI. If there are 2 addis it's the same number of instructions as shl+add. If there are more than 2 that it would increase instructions over folding the addis into the loads/stores.
---
Full diff: https://github.com/llvm/llvm-project/pull/150135.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+68-2)
- (modified) llvm/test/CodeGen/RISCV/xqcisls.ll (+23-24)
- (modified) llvm/test/CodeGen/RISCV/xtheadmemidx.ll (+8-8)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index a541c2fe2654c..5d9a283933990 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3032,6 +3032,63 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
return true;
}
+/// Return true if this a load/store that we have a RegRegScale instruction for.
+static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add,
+ const RISCVSubtarget &Subtarget) {
+ if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
+ return false;
+ EVT VT = cast<MemSDNode>(User)->getMemoryVT();
+ if (!(VT.isScalarInteger() &&
+ (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
+ !((VT == MVT::f32 || VT == MVT::f64) &&
+ Subtarget.hasVendorXTHeadFMemIdx()))
+ return false;
+ // Don't allow stores of the value. It must be used as the address.
+ if (User->getOpcode() == ISD::STORE &&
+ cast<StoreSDNode>(User)->getValue() == Add)
+ return false;
+
+ return true;
+}
+
+/// Is it profitable to fold this Add into RegRegScale load/store. If \p
+/// Shift is non-null, then we have matched a shl+add. We allow reassociating
+/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
+/// single addi and we don't have a SHXADD instruction we coudl use.
+/// FIXME: May still need to check how many and what kind of users the SHL has.
+static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
+ SDValue Add,
+ SDValue Shift = SDValue()) {
+ bool FoundADDI = false;
+ for (auto *User : Add->users()) {
+ if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
+ continue;
+
+ // Allow a single ADDI that is used by loads/stores if we matched a shift.
+ if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(User->getOperand(1)) ||
+ !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
+ return false;
+
+ FoundADDI = true;
+
+ // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
+ assert(Shift.getOpcode() == ISD::SHL);
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+ if ((ShiftAmt <= 3 &&
+ (Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa())) ||
+ (ShiftAmt >= 4 && ShiftAmt <= 7 && Subtarget.hasVendorXqciac()))
+ return false;
+
+ // All users of the ADDI should be load/store.
+ for (auto *ADDIUser : User->users())
+ if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
+ return false;
+ }
+
+ return true;
+}
+
bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
unsigned MaxShiftAmount,
SDValue &Base, SDValue &Index,
@@ -3062,7 +3119,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
if (LHS.getOpcode() == ISD::ADD &&
!isa<ConstantSDNode>(LHS.getOperand(1)) &&
isInt<12>(C1->getSExtValue())) {
- if (SelectShl(LHS.getOperand(1), Index, Scale)) {
+ if (SelectShl(LHS.getOperand(1), Index, Scale) &&
+ isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
SDLoc(Addr), VT);
Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
@@ -3072,7 +3130,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
}
// Add is commutative so we need to check both operands.
- if (SelectShl(LHS.getOperand(0), Index, Scale)) {
+ if (SelectShl(LHS.getOperand(0), Index, Scale) &&
+ isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
SDLoc(Addr), VT);
Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
@@ -3090,16 +3149,23 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
// Try to match a shift on the RHS.
if (SelectShl(RHS, Index, Scale)) {
+ if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
+ return false;
Base = LHS;
return true;
}
// Try to match a shift on the LHS.
if (SelectShl(LHS, Index, Scale)) {
+ if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
+ return false;
Base = RHS;
return true;
}
+ if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
+ return false;
+
Base = LHS;
Index = RHS;
Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
diff --git a/llvm/test/CodeGen/RISCV/xqcisls.ll b/llvm/test/CodeGen/RISCV/xqcisls.ll
index 709dc4ce074dc..3dea540de4f4e 100644
--- a/llvm/test/CodeGen/RISCV/xqcisls.ll
+++ b/llvm/test/CodeGen/RISCV/xqcisls.ll
@@ -308,13 +308,13 @@ define i64 @lrd(ptr %a, i32 %b) {
;
; RV32IZBAXQCISLS-LABEL: lrd:
; RV32IZBAXQCISLS: # %bb.0:
-; RV32IZBAXQCISLS-NEXT: qc.lrw a2, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT: addi a0, a0, 4
-; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT: add a0, a2, a2
-; RV32IZBAXQCISLS-NEXT: sltu a2, a0, a2
-; RV32IZBAXQCISLS-NEXT: add a1, a1, a1
-; RV32IZBAXQCISLS-NEXT: add a1, a1, a2
+; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
+; RV32IZBAXQCISLS-NEXT: lw a1, 0(a0)
+; RV32IZBAXQCISLS-NEXT: lw a2, 4(a0)
+; RV32IZBAXQCISLS-NEXT: add a0, a1, a1
+; RV32IZBAXQCISLS-NEXT: sltu a1, a0, a1
+; RV32IZBAXQCISLS-NEXT: add a2, a2, a2
+; RV32IZBAXQCISLS-NEXT: add a1, a2, a1
; RV32IZBAXQCISLS-NEXT: ret
%1 = getelementptr i64, ptr %a, i32 %b
%2 = load i64, ptr %1, align 8
@@ -348,14 +348,13 @@ define i64 @lrd_2(ptr %a, i32 %b) {
;
; RV32IZBAXQCISLS-LABEL: lrd_2:
; RV32IZBAXQCISLS: # %bb.0:
-; RV32IZBAXQCISLS-NEXT: addi a2, a0, 96
-; RV32IZBAXQCISLS-NEXT: qc.lrw a2, a2, a1, 3
-; RV32IZBAXQCISLS-NEXT: addi a0, a0, 100
-; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT: add a0, a2, a2
-; RV32IZBAXQCISLS-NEXT: sltu a2, a0, a2
-; RV32IZBAXQCISLS-NEXT: add a1, a1, a1
-; RV32IZBAXQCISLS-NEXT: add a1, a1, a2
+; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
+; RV32IZBAXQCISLS-NEXT: lw a1, 96(a0)
+; RV32IZBAXQCISLS-NEXT: lw a2, 100(a0)
+; RV32IZBAXQCISLS-NEXT: add a0, a1, a1
+; RV32IZBAXQCISLS-NEXT: sltu a1, a0, a1
+; RV32IZBAXQCISLS-NEXT: add a2, a2, a2
+; RV32IZBAXQCISLS-NEXT: add a1, a2, a1
; RV32IZBAXQCISLS-NEXT: ret
%1 = add i32 %b, 12
%2 = getelementptr i64, ptr %a, i32 %1
@@ -472,11 +471,11 @@ define void @srd(ptr %a, i32 %b, i64 %c) {
; RV32IZBAXQCISLS: # %bb.0:
; RV32IZBAXQCISLS-NEXT: add a4, a2, a2
; RV32IZBAXQCISLS-NEXT: add a3, a3, a3
-; RV32IZBAXQCISLS-NEXT: sltu a2, a4, a2
-; RV32IZBAXQCISLS-NEXT: qc.srw a4, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT: add a2, a3, a2
-; RV32IZBAXQCISLS-NEXT: addi a0, a0, 4
-; RV32IZBAXQCISLS-NEXT: qc.srw a2, a0, a1, 3
+; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
+; RV32IZBAXQCISLS-NEXT: sltu a1, a4, a2
+; RV32IZBAXQCISLS-NEXT: add a1, a3, a1
+; RV32IZBAXQCISLS-NEXT: sw a4, 0(a0)
+; RV32IZBAXQCISLS-NEXT: sw a1, 4(a0)
; RV32IZBAXQCISLS-NEXT: ret
%1 = add i64 %c, %c
%2 = getelementptr i64, ptr %a, i32 %b
@@ -503,10 +502,10 @@ define i64 @lrd_large_shift(ptr %a, i32 %b) {
;
; RV32IZBAXQCISLS-LABEL: lrd_large_shift:
; RV32IZBAXQCISLS: # %bb.0:
-; RV32IZBAXQCISLS-NEXT: addi a2, a0, 384
-; RV32IZBAXQCISLS-NEXT: addi a3, a0, 388
-; RV32IZBAXQCISLS-NEXT: qc.lrw a0, a2, a1, 5
-; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a3, a1, 5
+; RV32IZBAXQCISLS-NEXT: slli a1, a1, 5
+; RV32IZBAXQCISLS-NEXT: add a1, a1, a0
+; RV32IZBAXQCISLS-NEXT: lw a0, 384(a1)
+; RV32IZBAXQCISLS-NEXT: lw a1, 388(a1)
; RV32IZBAXQCISLS-NEXT: ret
%1 = add i32 %b, 12
%2 = shl i32 %1, 2
diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
index a20b08aa61c68..9f0f8d9b2b8b7 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
@@ -704,14 +704,14 @@ define i64 @lrd(ptr %a, iXLen %b) {
define i64 @lrd_2(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd_2:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: addi a2, a0, 96
-; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a2, a1, 3
-; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 100
-; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: add a0, a2, a2
-; RV32XTHEADMEMIDX-NEXT: sltu a2, a0, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a1
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
+; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3
+; RV32XTHEADMEMIDX-NEXT: add a0, a1, a0
+; RV32XTHEADMEMIDX-NEXT: lw a1, 96(a0)
+; RV32XTHEADMEMIDX-NEXT: lw a2, 100(a0)
+; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
+; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
+; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lrd_2:
``````````
</details>
https://github.com/llvm/llvm-project/pull/150135
More information about the llvm-commits
mailing list