[llvm] 28fdda6 - [RISCV] Use SLLI.UW in double-SHL_ADD multiplications (#166728)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 21:47:05 PST 2025
Author: Piotr Fusik
Date: 2025-11-07T06:47:01+01:00
New Revision: 28fdda6ae1ba4ae1c5888b1f286cb7fc3703846d
URL: https://github.com/llvm/llvm-project/commit/28fdda6ae1ba4ae1c5888b1f286cb7fc3703846d
DIFF: https://github.com/llvm/llvm-project/commit/28fdda6ae1ba4ae1c5888b1f286cb7fc3703846d.diff
LOG: [RISCV] Use SLLI.UW in double-SHL_ADD multiplications (#166728)
Similarily to muls by 3/5/9 << N, emit the SHL first for other SHL_ADD
multiplications, if it can be fold into SLLI.UW.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rv64zba.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3b69edacb8982..c1d38419992b1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16496,30 +16496,50 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
}
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
- unsigned ShY, bool AddX) {
+ unsigned ShY, bool AddX, unsigned Shift) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
- SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ // Put the shift first if we can fold a zext into the shift forming a slli.uw.
+ using namespace SDPatternMatch;
+ if (Shift != 0 &&
+ sd_match(X, m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
+ X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
+ Shift = 0;
+ }
+ SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(ShY, DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359);
+ if (ShX != 0)
+ ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, ShlAdd,
+ DAG.getTargetConstant(ShX, DL, VT), AddX ? X : ShlAdd);
+ if (Shift == 0)
+ return ShlAdd;
+ // Otherwise, put the shl last so that it can fold with following instructions
+ // (e.g. sext or add).
+ return DAG.getNode(ISD::SHL, DL, VT, ShlAdd, DAG.getConstant(Shift, DL, VT));
}
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
- uint64_t MulAmt) {
- // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
+ uint64_t MulAmt, unsigned Shift) {
switch (MulAmt) {
+ // 3/5/9 -> (shYadd X, X)
+ case 3:
+ return getShlAddShlAdd(N, DAG, 0, 1, /*AddX=*/false, Shift);
+ case 5:
+ return getShlAddShlAdd(N, DAG, 0, 2, /*AddX=*/false, Shift);
+ case 9:
+ return getShlAddShlAdd(N, DAG, 0, 3, /*AddX=*/false, Shift);
+ // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
case 5 * 3:
- return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false, Shift);
case 9 * 3:
- return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false, Shift);
case 5 * 5:
- return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false, Shift);
case 9 * 5:
- return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false, Shift);
case 9 * 9:
- return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false);
+ return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false, Shift);
default:
break;
}
@@ -16529,7 +16549,7 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
assert(ShX != 0 && "MulAmt=4,6,10 handled before");
if (ShX <= 3)
- return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true);
+ return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
}
return SDValue();
}
@@ -16569,42 +16589,18 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
// real regressions, and no other target properly freezes X in these cases
// either.
if (Subtarget.hasShlAdd(3)) {
- SDValue X = N->getOperand(0);
- int Shift;
- if (int ShXAmount = isShifted359(MulAmt, Shift)) {
- // 3/5/9 * 2^N -> shl (shXadd X, X), N
- SDLoc DL(N);
- // Put the shift first if we can fold a zext into the shift forming
- // a slli.uw.
- if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
- X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
- SDValue Shl =
- DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
- DAG.getTargetConstant(ShXAmount, DL, VT), Shl);
- }
- // Otherwise, put the shl second so that it can fold with following
- // instructions (e.g. sext or add).
- SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getTargetConstant(ShXAmount, DL, VT), X);
- return DAG.getNode(ISD::SHL, DL, VT, Mul359,
- DAG.getConstant(Shift, DL, VT));
- }
-
+ // 3/5/9 * 2^N -> (shl (shXadd X, X), N)
// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
// of 25 which happen to be quite common.
// (2/4/8 * 3/5/9 + 1) * 2^N
- Shift = llvm::countr_zero(MulAmt);
- if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
- if (Shift == 0)
- return V;
- SDLoc DL(N);
- return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
- }
+ unsigned Shift = llvm::countr_zero(MulAmt);
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift, Shift))
+ return V;
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
// easy. Then count how many zeros are up to the first bit.
+ SDValue X = N->getOperand(0);
if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
SDLoc DL(N);
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index d4b228828c04d..e56c7b41d43ce 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -1459,6 +1459,34 @@ define i64 @mul288(i64 %a) {
ret i64 %c
}
+define i64 @zext_mul44(i32 signext %a) {
+; RV64I-LABEL: zext_mul44:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 11
+; RV64I-NEXT: slli a1, a1, 34
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: mulhu a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: zext_mul44:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: slli.uw a0, a0, 2
+; RV64ZBA-NEXT: sh2add a1, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a1, a0
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: zext_mul44:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: slli a0, a0, 32
+; RV64XANDESPERF-NEXT: srli a0, a0, 30
+; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1
+; RV64XANDESPERF-NEXT: ret
+ %b = zext i32 %a to i64
+ %c = mul i64 %b, 44
+ ret i64 %c
+}
+
define i64 @zext_mul68(i32 signext %a) {
; RV64I-LABEL: zext_mul68:
; RV64I: # %bb.0:
@@ -1511,6 +1539,34 @@ define i64 @zext_mul96(i32 signext %a) {
ret i64 %c
}
+define i64 @zext_mul100(i32 signext %a) {
+; RV64I-LABEL: zext_mul100:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a1, 25
+; RV64I-NEXT: slli a1, a1, 34
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: mulhu a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: zext_mul100:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: slli.uw a0, a0, 2
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a0, a0
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: zext_mul100:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: slli a0, a0, 32
+; RV64XANDESPERF-NEXT: srli a0, a0, 30
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a0
+; RV64XANDESPERF-NEXT: ret
+ %b = zext i32 %a to i64
+ %c = mul i64 %b, 100
+ ret i64 %c
+}
+
define i64 @zext_mul160(i32 signext %a) {
; RV64I-LABEL: zext_mul160:
; RV64I: # %bb.0:
More information about the llvm-commits
mailing list