[llvm] bf3784b - [AArch64] Canonicalize X*(Y+1) or X*(1-Y) to madd/msub
Andrew Wei via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 8 00:53:08 PST 2021
Author: Andrew Wei
Date: 2021-11-08T16:49:31+08:00
New Revision: bf3784b882c413108ed73f12f5f00b0146e7fb09
URL: https://github.com/llvm/llvm-project/commit/bf3784b882c413108ed73f12f5f00b0146e7fb09
DIFF: https://github.com/llvm/llvm-project/commit/bf3784b882c413108ed73f12f5f00b0146e7fb09.diff
LOG: [AArch64] Canonicalize X*(Y+1) or X*(1-Y) to madd/msub
Performing the rearrangement for add/sub and mul instructions to match the madd/msub pattern
Reviewed By: dmgreen, sdesmalen, david-arm
Differential Revision: https://reviews.llvm.org/D111862
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/madd-combiner.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fef430fd5965..a076b055628b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13132,12 +13132,44 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
+ // and in MachineCombiner pass, add+mul will be combined into madd.
+ // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue MulOper;
+ unsigned AddSubOpc;
+
+ auto IsAddSubWith1 = [&](SDValue V) -> bool {
+ AddSubOpc = V->getOpcode();
+ if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
+ SDValue Opnd = V->getOperand(1);
+ MulOper = V->getOperand(0);
+ if (AddSubOpc == ISD::SUB)
+ std::swap(Opnd, MulOper);
+ if (auto C = dyn_cast<ConstantSDNode>(Opnd))
+ return C->isOne();
+ }
+ return false;
+ };
+
+ if (IsAddSubWith1(N0)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
+ }
+
+ if (IsAddSubWith1(N1)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
+ }
+
// The below optimizations require a constant RHS.
- if (!isa<ConstantSDNode>(N->getOperand(1)))
+ if (!isa<ConstantSDNode>(N1))
return SDValue();
- SDValue N0 = N->getOperand(0);
- ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *C = cast<ConstantSDNode>(N1);
const APInt &ConstValue = C->getAPIntValue();
// Allow the scaling to be folded into the `cnt` instruction by preventing
@@ -13178,7 +13210,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// and shift+add+shift.
APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
- unsigned ShiftAmt, AddSubOpc;
+ unsigned ShiftAmt;
// Is the shifted value the LHS operand of the add/sub?
bool ShiftValUseIsN0 = true;
// Do we need to negate the result?
@@ -13215,8 +13247,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getConstant(ShiftAmt, DL, MVT::i64));
diff --git a/llvm/test/CodeGen/AArch64/madd-combiner.ll b/llvm/test/CodeGen/AArch64/madd-combiner.ll
index 8a3b5fdcee87..07fbcddb307e 100644
--- a/llvm/test/CodeGen/AArch64/madd-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/madd-combiner.ll
@@ -1,20 +1,25 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FAST
; Test that we use the correct register class.
define i32 @mul_add_imm(i32 %a, i32 %b) {
-; CHECK-LABEL: mul_add_imm
-; CHECK: orr [[REG:w[0-9]+]], wzr, #0x4
-; CHECK-NEXT: madd {{w[0-9]+}}, w0, w1, [[REG]]
+; CHECK-LABEL: mul_add_imm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: orr w8, wzr, #0x4
+; CHECK-NEXT: madd w0, w0, w1, w8
+; CHECK-NEXT: ret
%1 = mul i32 %a, %b
%2 = add i32 %1, 4
ret i32 %2
}
define i32 @mul_sub_imm1(i32 %a, i32 %b) {
-; CHECK-LABEL: mul_sub_imm1
-; CHECK: mov [[REG:w[0-9]+]], #4
-; CHECK-NEXT: msub {{w[0-9]+}}, w0, w1, [[REG]]
+; CHECK-LABEL: mul_sub_imm1:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: mov w8, #4
+; CHECK-NEXT: msub w0, w0, w1, w8
+; CHECK-NEXT: ret
%1 = mul i32 %a, %b
%2 = sub i32 4, %1
ret i32 %2
@@ -22,6 +27,29 @@ define i32 @mul_sub_imm1(i32 %a, i32 %b) {
; bugpoint reduced test case. This only tests that we pass the MI verifier.
define void @mul_add_imm2() {
+; CHECK-ISEL-LABEL: mul_add_imm2:
+; CHECK-ISEL: ; %bb.0: ; %entry
+; CHECK-ISEL-NEXT: mov w8, #1
+; CHECK-ISEL-NEXT: LBB2_1: ; %for.body8
+; CHECK-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-ISEL-NEXT: cbnz w8, LBB2_1
+; CHECK-ISEL-NEXT: ; %bb.2: ; %for.end20
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: mul_add_imm2:
+; CHECK-FAST: ; %bb.0: ; %entry
+; CHECK-FAST-NEXT: mov x8, #-3
+; CHECK-FAST-NEXT: orr x9, xzr, #0xfffffffffffffffd
+; CHECK-FAST-NEXT: madd x8, x8, x8, x9
+; CHECK-FAST-NEXT: mov x9, #45968
+; CHECK-FAST-NEXT: movk x9, #48484, lsl #16
+; CHECK-FAST-NEXT: movk x9, #323, lsl #32
+; CHECK-FAST-NEXT: LBB2_1: ; %for.body8
+; CHECK-FAST-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-FAST-NEXT: cmp x8, x9
+; CHECK-FAST-NEXT: b.lt LBB2_1
+; CHECK-FAST-NEXT: ; %bb.2: ; %for.end20
+; CHECK-FAST-NEXT: ret
entry:
br label %for.body
for.body:
@@ -35,3 +63,141 @@ for.end20:
ret void
}
+define i32 @add1_mul_val1(i32 %a, i32 %b) {
+; CHECK-ISEL-LABEL: add1_mul_val1:
+; CHECK-ISEL: ; %bb.0:
+; CHECK-ISEL-NEXT: madd w0, w1, w0, w1
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: add1_mul_val1:
+; CHECK-FAST: ; %bb.0:
+; CHECK-FAST-NEXT: add w8, w0, #1
+; CHECK-FAST-NEXT: mul w0, w8, w1
+; CHECK-FAST-NEXT: ret
+ %1 = add i32 %a, 1
+ %2 = mul i32 %1, %b
+ ret i32 %2
+}
+
+define i32 @add1_mul_val2(i32 %a, i32 %b) {
+; CHECK-ISEL-LABEL: add1_mul_val2:
+; CHECK-ISEL: ; %bb.0:
+; CHECK-ISEL-NEXT: madd w0, w0, w1, w0
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: add1_mul_val2:
+; CHECK-FAST: ; %bb.0:
+; CHECK-FAST-NEXT: add w8, w1, #1
+; CHECK-FAST-NEXT: mul w0, w0, w8
+; CHECK-FAST-NEXT: ret
+ %1 = add i32 %b, 1
+ %2 = mul i32 %a, %1
+ ret i32 %2
+}
+
+define i64 @add1_mul_val3(i64 %a, i64 %b) {
+; CHECK-ISEL-LABEL: add1_mul_val3:
+; CHECK-ISEL: ; %bb.0:
+; CHECK-ISEL-NEXT: madd x0, x0, x1, x0
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: add1_mul_val3:
+; CHECK-FAST: ; %bb.0:
+; CHECK-FAST-NEXT: add x8, x1, #1
+; CHECK-FAST-NEXT: mul x0, x0, x8
+; CHECK-FAST-NEXT: ret
+ %1 = add i64 %b, 1
+ %2 = mul i64 %a, %1
+ ret i64 %2
+}
+
+define i64 @add1_mul_val4(i64 %a, i64 %b, i64 %c) {
+; CHECK-ISEL-LABEL: add1_mul_val4:
+; CHECK-ISEL: ; %bb.0:
+; CHECK-ISEL-NEXT: add x8, x0, x2
+; CHECK-ISEL-NEXT: madd x0, x8, x1, x8
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: add1_mul_val4:
+; CHECK-FAST: ; %bb.0:
+; CHECK-FAST-NEXT: add x8, x1, #1
+; CHECK-FAST-NEXT: add x9, x0, x2
+; CHECK-FAST-NEXT: mul x0, x9, x8
+; CHECK-FAST-NEXT: ret
+ %1 = add i64 %a, %c
+ %2 = add i64 %b, 1
+ %3 = mul i64 %1, %2
+ ret i64 %3
+}
+
+define i32 @sub1_mul_val1(i32 %a, i32 %b) {
+; CHECK-ISEL-LABEL: sub1_mul_val1:
+; CHECK-ISEL: ; %bb.0:
+; CHECK-ISEL-NEXT: msub w0, w1, w0, w1
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: sub1_mul_val1:
+; CHECK-FAST: ; %bb.0:
+; CHECK-FAST-NEXT: mov w8, #1
+; CHECK-FAST-NEXT: sub w8, w8, w0
+; CHECK-FAST-NEXT: mul w0, w8, w1
+; CHECK-FAST-NEXT: ret
+ %1 = sub i32 1, %a
+ %2 = mul i32 %1, %b
+ ret i32 %2
+}
+
+define i32 @sub1_mul_val2(i32 %a, i32 %b) {
+; CHECK-ISEL-LABEL: sub1_mul_val2:
+; CHECK-ISEL: ; %bb.0:
+; CHECK-ISEL-NEXT: msub w0, w0, w1, w0
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: sub1_mul_val2:
+; CHECK-FAST: ; %bb.0:
+; CHECK-FAST-NEXT: mov w8, #1
+; CHECK-FAST-NEXT: sub w8, w8, w1
+; CHECK-FAST-NEXT: mul w0, w0, w8
+; CHECK-FAST-NEXT: ret
+ %1 = sub i32 1, %b
+ %2 = mul i32 %a, %1
+ ret i32 %2
+}
+
+define i64 @sub1_mul_val3(i64 %a, i64 %b) {
+; CHECK-ISEL-LABEL: sub1_mul_val3:
+; CHECK-ISEL: ; %bb.0:
+; CHECK-ISEL-NEXT: msub x0, x0, x1, x0
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: sub1_mul_val3:
+; CHECK-FAST: ; %bb.0:
+; CHECK-FAST-NEXT: mov x8, #1
+; CHECK-FAST-NEXT: sub x8, x8, x1
+; CHECK-FAST-NEXT: mul x0, x0, x8
+; CHECK-FAST-NEXT: ret
+ %1 = sub i64 1, %b
+ %2 = mul i64 %a, %1
+ ret i64 %2
+}
+
+define i64 @sub1_mul_val4(i64 %a, i64 %b) {
+; CHECK-ISEL-LABEL: sub1_mul_val4:
+; CHECK-ISEL: ; %bb.0:
+; CHECK-ISEL-NEXT: sub x8, x0, #1
+; CHECK-ISEL-NEXT: msub x0, x8, x1, x8
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-FAST-LABEL: sub1_mul_val4:
+; CHECK-FAST: ; %bb.0:
+; CHECK-FAST-NEXT: mov x8, #1
+; CHECK-FAST-NEXT: sub x9, x0, #1
+; CHECK-FAST-NEXT: sub x8, x8, x1
+; CHECK-FAST-NEXT: mul x0, x9, x8
+; CHECK-FAST-NEXT: ret
+ %1 = sub i64 %a, 1
+ %2 = sub i64 1, %b
+ %3 = mul i64 %1, %2
+ ret i64 %3
+}
+
More information about the llvm-commits
mailing list