[llvm] [RISCV] Expand multiplication by `(2/4/8 * 3/5/9 + 1) << N` with SHL_ADD (PR #166372)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 05:59:41 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Piotr Fusik (pfusik)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/166372.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+29-23)
- (modified) llvm/test/CodeGen/RISCV/rv64xtheadba.ll (+13-6)
- (modified) llvm/test/CodeGen/RISCV/rv64zba.ll (+203-6)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c56ce3fd2a5a4..b597cbf8b2ba3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16496,29 +16496,47 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
}
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
- unsigned ShY) {
+ unsigned ShY, bool AddX) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getConstant(ShY, DL, VT), X);
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getConstant(ShX, DL, VT), Mul359);
+ DAG.getConstant(ShX, DL, VT), AddX ? X : Mul359);
}
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
switch (MulAmt) {
case 5 * 3:
- return getShlAddShlAdd(N, DAG, 2, 1);
+ return getShlAddShlAdd(N, DAG, 2, 1, false);
case 9 * 3:
- return getShlAddShlAdd(N, DAG, 3, 1);
+ return getShlAddShlAdd(N, DAG, 3, 1, false);
case 5 * 5:
- return getShlAddShlAdd(N, DAG, 2, 2);
+ return getShlAddShlAdd(N, DAG, 2, 2, false);
case 9 * 5:
- return getShlAddShlAdd(N, DAG, 3, 2);
+ return getShlAddShlAdd(N, DAG, 3, 2, false);
case 9 * 9:
- return getShlAddShlAdd(N, DAG, 3, 3);
+ return getShlAddShlAdd(N, DAG, 3, 3, false);
+ case 2 * 3 + 1:
+ return getShlAddShlAdd(N, DAG, 1, 1, true);
+ case 4 * 3 + 1:
+ return getShlAddShlAdd(N, DAG, 2, 1, true);
+ // case 8 * 3 + 1:
+ // Prefer 5 * 5 above because it doesn't require a register to hold X.
+ case 2 * 5 + 1:
+ return getShlAddShlAdd(N, DAG, 1, 2, true);
+ case 4 * 5 + 1:
+ return getShlAddShlAdd(N, DAG, 2, 2, true);
+ case 8 * 5 + 1:
+ return getShlAddShlAdd(N, DAG, 3, 2, true);
+ case 2 * 9 + 1:
+ return getShlAddShlAdd(N, DAG, 1, 3, true);
+ case 4 * 9 + 1:
+ return getShlAddShlAdd(N, DAG, 2, 3, true);
+ case 8 * 9 + 1:
+ return getShlAddShlAdd(N, DAG, 3, 3, true);
default:
return SDValue();
}
@@ -16581,7 +16599,8 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(Shift, DL, VT));
}
- // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
+ // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
+ // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
return V;
@@ -16600,21 +16619,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
}
}
- // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
- // This is the two instruction form, there are also three instruction
- // variants we could implement. e.g.
- // (2^(1,2,3) * 3,5,9 + 1) << C2
- // 2^(C1>3) * 3,5,9 +/- 1
- if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) {
- assert(Shift != 0 && "MulAmt=4,6,10 handled before");
- if (Shift <= 3) {
- SDLoc DL(N);
- SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getConstant(ShXAmount, DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getConstant(Shift, DL, VT), X);
- }
- }
+ // TODO: 2^(C1>3) * 3,5,9 +/- 1
// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
@@ -16648,6 +16653,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
// of 25 which happen to be quite common.
+ // (2/4/8 * 3/5/9 + 1) * 2^N
Shift = llvm::countr_zero(MulAmt);
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
SDLoc DL(N);
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
index 50bd22bf5fd69..f4964288e3541 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
@@ -205,12 +205,19 @@ define i64 @addmul20(i64 %a, i64 %b) {
}
define i64 @addmul22(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul22:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 22
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul22:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 22
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBA-LABEL: addmul22:
+; RV64XTHEADBA: # %bb.0:
+; RV64XTHEADBA-NEXT: th.addsl a2, a0, a0, 2
+; RV64XTHEADBA-NEXT: th.addsl a0, a0, a2, 1
+; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1
+; RV64XTHEADBA-NEXT: ret
%c = mul i64 %a, 22
%d = add i64 %c, %b
ret i64 %d
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 7fd76262d547a..d4b228828c04d 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -585,6 +585,33 @@ define i64 @addmul12(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul14(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul14:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a2, a0, 1
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul14:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh1add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul14:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 14
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul18(i64 %a, i64 %b) {
; RV64I-LABEL: addmul18:
; RV64I: # %bb.0:
@@ -636,12 +663,26 @@ define i64 @addmul20(i64 %a, i64 %b) {
}
define i64 @addmul22(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul22:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 22
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul22:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 22
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul22:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul22:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 22
%d = add i64 %c, %b
ret i64 %d
@@ -672,6 +713,32 @@ define i64 @addmul24(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul26(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul26:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 26
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul26:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh1add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul26:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 26
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul36(i64 %a, i64 %b) {
; RV64I-LABEL: addmul36:
; RV64I: # %bb.0:
@@ -722,6 +789,58 @@ define i64 @addmul40(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul38(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul38:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 38
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul38:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul38:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 38
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul42(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul42:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 42
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul42:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul42:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 42
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul72(i64 %a, i64 %b) {
; RV64I-LABEL: addmul72:
; RV64I: # %bb.0:
@@ -747,6 +866,84 @@ define i64 @addmul72(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul74(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul74:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 74
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul74:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul74:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 74
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul82(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul82:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 82
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul82:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh3add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul82:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 82
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul146(i64 %a, i64 %b) {
+; RV64I-LABEL: addmul146:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 146
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul146:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh3add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul146:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
+ %c = mul i64 %a, 146
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @mul50(i64 %a) {
; RV64I-LABEL: mul50:
; RV64I: # %bb.0:
``````````
</details>
https://github.com/llvm/llvm-project/pull/166372
More information about the llvm-commits
mailing list