[llvm] [RISCV] Add combine for shadd family of instructions. (PR #130829)
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 11 13:46:52 PDT 2025
https://github.com/stefanp-synopsys created https://github.com/llvm/llvm-project/pull/130829
For example for the following situation:
%6:gpr = SLLI %2:gpr, 2
%7:gpr = ADDI killed %6:gpr, 24
%8:gpr = ADD %0:gpr, %7:gpr
If we swap the two add instrucions we can merge the shift and add. The final code will look something like this:
%7 = SH2ADD %0, %2
%8 = ADDI %7, 24
>From df5bcbf5db3df3bf02869c0fab5f7ba541c7097d Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <pintilie at synopsys.com>
Date: Tue, 11 Mar 2025 13:41:31 -0700
Subject: [PATCH] [RISCV] Add combine for shadd family of instructions.
For example for the following situation:
%6:gpr = SLLI %2:gpr, 2
%7:gpr = ADDI killed %6:gpr, 24
%8:gpr = ADD %0:gpr, %7:gpr
If we swap the two add instrucions we can merge the shift and add.
The final code will look something like this:
%7 = SH2ADD %0, %2
%8 = ADDI %7, 24
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 92 +++++++++++++-
.../CodeGen/RISCV/reassoc-shl-addi-add.ll | 113 ++++++++++++++++++
2 files changed, 204 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 27a4bbce1f5fc..6334eab8c96ec 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -79,6 +79,12 @@ static cl::opt<int>
"use for creating a floating-point immediate value"),
cl::init(2));
+static cl::opt<bool>
+ ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
+ cl::desc("Swap add and addi in cases where the add may "
+ "be combined with a shift"),
+ cl::init(true));
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -14306,6 +14312,87 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
}
+// Check if this SDValue is an add immediate and then
+static bool checkAddiForShift(SDValue AddI) {
+ // Based on testing it seems that performance degrades if the ADDI has
+ // more than 2 uses.
+ if (AddI->use_size() > 2)
+ return false;
+
+ ConstantSDNode *AddConst = dyn_cast<ConstantSDNode>(AddI->getOperand(1));
+ if (!AddConst)
+ return false;
+
+ SDValue SHLVal = AddI->getOperand(0);
+ if (SHLVal->getOpcode() != ISD::SHL)
+ return false;
+
+ ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(SHLVal->getOperand(1));
+ if (!ShiftNode)
+ return false;
+
+ auto ShiftVal = ShiftNode->getSExtValue();
+ if (ShiftVal != 1 && ShiftVal != 2 && ShiftVal != 3)
+ return false;
+
+ return true;
+}
+
+// Optimize (add (add (shl x, c0), c1), y) ->
+// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
+static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (!ReassocShlAddiAdd)
+ return SDValue();
+
+ // Perform this optimization only in the zba extension.
+ if (!Subtarget.hasStdExtZba())
+ return SDValue();
+
+ // Skip for vector types and larger types.
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
+ return SDValue();
+
+ // Looking for a reg-reg add and not an addi.
+ auto *Op1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Op1)
+ return SDValue();
+ SDValue AddI;
+ SDValue Other;
+
+ if (N->getOperand(0)->getOpcode() == ISD::ADD &&
+ N->getOperand(1)->getOpcode() == ISD::ADD) {
+ AddI = N->getOperand(0);
+ Other = N->getOperand(1);
+ if (!checkAddiForShift(AddI)) {
+ AddI = N->getOperand(1);
+ Other = N->getOperand(0);
+ }
+ } else if (N->getOperand(0)->getOpcode() == ISD::ADD) {
+ AddI = N->getOperand(0);
+ Other = N->getOperand(1);
+ } else if (N->getOperand(1)->getOpcode() == ISD::ADD) {
+ AddI = N->getOperand(1);
+ Other = N->getOperand(0);
+ } else
+ return SDValue();
+
+ if (!checkAddiForShift(AddI))
+ return SDValue();
+
+ auto *AddConst = dyn_cast<ConstantSDNode>(AddI->getOperand(1));
+ SDValue SHLVal = AddI->getOperand(0);
+ auto *ShiftNode = dyn_cast<ConstantSDNode>(SHLVal->getOperand(1));
+ auto ShiftVal = ShiftNode->getSExtValue();
+ SDLoc DL(N);
+
+ SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
+ DAG.getConstant(ShiftVal, DL, VT), Other);
+ return DAG.getNode(ISD::ADD, DL, VT, SHADD,
+ DAG.getConstant(AddConst->getSExtValue(), DL, VT));
+}
+
// Combine a constant select operand into its use:
//
// (and (select cond, -1, c), x)
@@ -14547,9 +14634,12 @@ static SDValue performADDCombine(SDNode *N,
return V;
if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
return V;
- if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
+ if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
+ return V;
+ }
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
diff --git a/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll b/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll
new file mode 100644
index 0000000000000..e1fa408706c4e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll
@@ -0,0 +1,113 @@
+; RUN: llc -mtriple=riscv32-pc-unknown-gnu -mattr=+zba %s -o - | FileCheck %s
+
+declare dso_local i32 @callee1(i32 noundef) local_unnamed_addr
+declare dso_local i32 @callee2(i32 noundef, i32 noundef) local_unnamed_addr
+declare dso_local i32 @callee(i32 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr
+
+; CHECK-LABEL: t1:
+; CHECK: sh2add
+; CHECK: sh2add
+; CHECK: sh2add
+; CHECK: tail callee
+
+define dso_local void @t1(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) local_unnamed_addr #0 {
+entry:
+ %shl = shl i32 %a, 2
+ %add = add nsw i32 %shl, 45
+ %add1 = add nsw i32 %add, %b
+ %add3 = add nsw i32 %add, %c
+ %add5 = add nsw i32 %shl, %d
+ %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5)
+ ret void
+}
+
+; CHECK-LABEL: t2:
+; CHECK: slli
+; CHECK-NOT: sh2add
+; CHECK: tail callee
+
+define dso_local void @t2(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 {
+entry:
+ %shl = shl i32 %a, 2
+ %add = add nsw i32 %shl, 42
+ %add4 = add nsw i32 %add, %b
+ %add7 = add nsw i32 %add, %c
+ %call = tail call i32 @callee(i32 noundef %shl, i32 noundef %add, i32 noundef %add4, i32 noundef %add7)
+ ret void
+}
+
+; CHECK-LABEL: t3
+; CHECK slli
+; CHECK-NOT: sh2add
+; CHECK: tail callee
+
+define dso_local void @t3(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) local_unnamed_addr #0 {
+entry:
+ %shl = shl i32 %a, 2
+ %add = add nsw i32 %shl, 42
+ %add1 = add nsw i32 %add, %b
+ %add2 = add nsw i32 %add, %c
+ %add3 = add nsw i32 %add, %d
+ %add4 = add nsw i32 %add, %e
+ %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add2, i32 noundef %add3, i32 noundef %add4)
+ ret void
+}
+
+; CHECK-LABEL: t4
+; CHECK: sh2add
+; CHECK-NEXT: addi
+; CHECK-NEXT: tail callee1
+
+define dso_local void @t4(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 {
+entry:
+ %shl = shl i32 %a, 2
+ %add = add nsw i32 %shl, 42
+ %add1 = add nsw i32 %add, %b
+ %call = tail call i32 @callee1(i32 noundef %add1)
+ ret void
+}
+
+; CHECK-LABEL: t5
+; CHECK: sh2add
+; CHECK: sh2add
+; CHECK: tail callee2
+
+define dso_local void @t5(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 {
+entry:
+ %shl = shl i32 %a, 2
+ %add = add nsw i32 %shl, 42
+ %add1 = add nsw i32 %add, %b
+ %add2 = add nsw i32 %add, %c
+ %call = tail call i32 @callee2(i32 noundef %add1, i32 noundef %add2)
+ ret void
+}
+
+; CHECK-LABEL: t6
+; CHECK-DAG: sh2add
+; CHECK-DAG: slli
+; CHECK: tail callee
+
+define dso_local void @t6(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 {
+entry:
+ %shl = shl i32 %a, 2
+ %add = add nsw i32 %shl, 42
+ %add1 = add nsw i32 %add, %b
+ %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %shl, i32 noundef %shl, i32 noundef %shl)
+ ret void
+}
+
+; CHECK-LABEL: t7
+; CHECK: slli
+; CHECK-NOT: sh2add
+; CHECK: tail callee
+
+define dso_local void @t7(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 {
+entry:
+ %shl = shl i32 %a, 2
+ %add = add nsw i32 %shl, 42
+ %add1 = add nsw i32 %add, %b
+ %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add, i32 noundef %add, i32 noundef %add)
+ ret void
+}
+
+attributes #0 = { nounwind optsize }
More information about the llvm-commits
mailing list