[llvm] 01bfe97 - [ISEL] Canonicalize STEP_VECTOR to LHS if RHS is a splat.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 3 01:32:22 PST 2022
Author: Sander de Smalen
Date: 2022-02-03T09:31:46Z
New Revision: 01bfe9729a9812c8dcdb8f239bce23bb10637ea8
URL: https://github.com/llvm/llvm-project/commit/01bfe9729a9812c8dcdb8f239bce23bb10637ea8
DIFF: https://github.com/llvm/llvm-project/commit/01bfe9729a9812c8dcdb8f239bce23bb10637ea8.diff
LOG: [ISEL] Canonicalize STEP_VECTOR to LHS if RHS is a splat.
This helps recognise patterns where we're trying to match STEP_VECTOR
patterns to INDEX instructions that take a GPR for the Start/Step.
The reason for canonicalising this operation to the LHS is
because it will already be canonicalised to the LHS if the RHS
is a constant splat vector.
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D118459
Added:
Modified:
llvm/include/llvm/CodeGen/SelectionDAG.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/AArch64/active_lane_mask.ll
llvm/test/CodeGen/AArch64/sve-stepvector.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index e31719bcff0b..6408bcffd232 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1412,6 +1412,11 @@ class SelectionDAG {
/// Return an AssertAlignSDNode.
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A);
+ /// Swap N1 and N2 if Opcode is a commutative binary opcode
+ /// and the canonical form expects the opposite order.
+ void canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
+ SDValue &N2) const;
+
/// Return the specified value casted to
/// the target's desired shift amount type.
SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d5998d166d25..df09368f6a8a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5629,20 +5629,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(Opcode, DL, VT, N1, N2, Flags);
}
+void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
+ SDValue &N2) const {
+ if (!TLI->isCommutativeBinOp(Opcode))
+ return;
+
+ // Canonicalize:
+ // binop(const, nonconst) -> binop(nonconst, const)
+ bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
+ bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
+ bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
+ if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
+ std::swap(N1, N2);
+
+ // Canonicalize:
+ // binop(splat(x), step_vector) -> binop(step_vector, splat(x))
+ else if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
+ N2.getOpcode() == ISD::STEP_VECTOR)
+ std::swap(N1, N2);
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
assert(N1.getOpcode() != ISD::DELETED_NODE &&
N2.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
- // Canonicalize constant to RHS if commutative.
- if (TLI->isCommutativeBinOp(Opcode)) {
- bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
- bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
- bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
- bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
- if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
- std::swap(N1, N2);
- }
+
+ canonicalizeCommutativeBinop(Opcode, N1, N2);
auto *N1C = dyn_cast<ConstantSDNode>(N1);
auto *N2C = dyn_cast<ConstantSDNode>(N2);
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index b5eb2d11c707..45f030dc4714 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -80,7 +80,7 @@ define <vscale x 16 x i1> @lane_mask_nxv16i1_i8(i8 %index, i8 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: mov z1.b, w0
-; CHECK-NEXT: uqadd z0.b, z1.b, z0.b
+; CHECK-NEXT: uqadd z0.b, z0.b, z1.b
; CHECK-NEXT: mov z1.b, w1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
@@ -96,7 +96,7 @@ define <vscale x 8 x i1> @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) {
; CHECK-NEXT: mov z1.h, w0
; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEXT: add z0.h, z1.h, z0.h
+; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: mov z1.h, w1
; CHECK-NEXT: umin z0.h, z0.h, #255
; CHECK-NEXT: and z1.h, z1.h, #0xff
@@ -115,7 +115,7 @@ define <vscale x 4 x i1> @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) {
; CHECK-NEXT: mov z1.s, w0
; CHECK-NEXT: and z0.s, z0.s, #0xff
; CHECK-NEXT: and z1.s, z1.s, #0xff
-; CHECK-NEXT: add z0.s, z1.s, z0.s
+; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: mov z1.s, w1
; CHECK-NEXT: umin z0.s, z0.s, #255
; CHECK-NEXT: and z1.s, z1.s, #0xff
@@ -135,7 +135,7 @@ define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) {
; CHECK-NEXT: mov z1.d, x0
; CHECK-NEXT: and z0.d, z0.d, #0xff
; CHECK-NEXT: and z1.d, z1.d, #0xff
-; CHECK-NEXT: add z0.d, z1.d, z0.d
+; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: mov z2.d, x1
; CHECK-NEXT: umin z0.d, z0.d, #255
@@ -167,29 +167,29 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i32(i32 %index, i32 %TC) {
; CHECK-NEXT: mov z2.d, z0.d
; CHECK-NEXT: mov z4.s, w1
; CHECK-NEXT: incw z1.s
-; CHECK-NEXT: uqadd z5.s, z3.s, z0.s
+; CHECK-NEXT: uqadd z5.s, z0.s, z3.s
; CHECK-NEXT: incw z2.s, all, mul #2
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: cmphi p1.s, p0/z, z4.s, z5.s
-; CHECK-NEXT: uqadd z5.s, z3.s, z1.s
+; CHECK-NEXT: uqadd z5.s, z1.s, z3.s
; CHECK-NEXT: cmphi p2.s, p0/z, z4.s, z5.s
-; CHECK-NEXT: uqadd z5.s, z3.s, z2.s
+; CHECK-NEXT: uqadd z5.s, z2.s, z3.s
; CHECK-NEXT: incw z6.s, all, mul #2
; CHECK-NEXT: incw z0.s, all, mul #4
; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z5.s
-; CHECK-NEXT: uqadd z5.s, z3.s, z6.s
+; CHECK-NEXT: uqadd z5.s, z6.s, z3.s
; CHECK-NEXT: incw z1.s, all, mul #4
; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z5.s
-; CHECK-NEXT: uqadd z0.s, z3.s, z0.s
-; CHECK-NEXT: uqadd z1.s, z3.s, z1.s
+; CHECK-NEXT: uqadd z0.s, z0.s, z3.s
+; CHECK-NEXT: uqadd z1.s, z1.s, z3.s
; CHECK-NEXT: incw z2.s, all, mul #4
; CHECK-NEXT: incw z6.s, all, mul #4
; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h
; CHECK-NEXT: uzp1 p2.h, p3.h, p4.h
; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z0.s
; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z1.s
-; CHECK-NEXT: uqadd z0.s, z3.s, z2.s
-; CHECK-NEXT: uqadd z1.s, z3.s, z6.s
+; CHECK-NEXT: uqadd z0.s, z2.s, z3.s
+; CHECK-NEXT: uqadd z1.s, z6.s, z3.s
; CHECK-NEXT: cmphi p5.s, p0/z, z4.s, z0.s
; CHECK-NEXT: cmphi p0.s, p0/z, z4.s, z1.s
; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h
@@ -223,63 +223,63 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) {
; CHECK-NEXT: mov z2.d, z0.d
; CHECK-NEXT: mov z4.d, x1
; CHECK-NEXT: incd z1.d
-; CHECK-NEXT: uqadd z5.d, z3.d, z0.d
-; CHECK-NEXT: uqadd z6.d, z3.d, z1.d
+; CHECK-NEXT: uqadd z5.d, z0.d, z3.d
+; CHECK-NEXT: uqadd z6.d, z1.d, z3.d
; CHECK-NEXT: cmphi p1.d, p0/z, z4.d, z5.d
; CHECK-NEXT: mov z5.d, z1.d
; CHECK-NEXT: incd z2.d, all, mul #2
; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d
-; CHECK-NEXT: uqadd z6.d, z3.d, z2.d
+; CHECK-NEXT: uqadd z6.d, z2.d, z3.d
; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: incd z5.d, all, mul #2
; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s
; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d
-; CHECK-NEXT: uqadd z6.d, z3.d, z5.d
+; CHECK-NEXT: uqadd z6.d, z5.d, z3.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: incd z7.d, all, mul #4
; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d
-; CHECK-NEXT: uqadd z6.d, z3.d, z7.d
+; CHECK-NEXT: uqadd z6.d, z7.d, z3.d
; CHECK-NEXT: mov z25.d, z2.d
; CHECK-NEXT: incd z24.d, all, mul #4
; CHECK-NEXT: mov z26.d, z5.d
; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z6.d
-; CHECK-NEXT: uqadd z6.d, z3.d, z24.d
+; CHECK-NEXT: uqadd z6.d, z24.d, z3.d
; CHECK-NEXT: incd z25.d, all, mul #4
; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z6.d
-; CHECK-NEXT: uqadd z6.d, z3.d, z25.d
+; CHECK-NEXT: uqadd z6.d, z25.d, z3.d
; CHECK-NEXT: incd z26.d, all, mul #4
; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z6.d
-; CHECK-NEXT: uqadd z6.d, z3.d, z26.d
+; CHECK-NEXT: uqadd z6.d, z26.d, z3.d
; CHECK-NEXT: uzp1 p2.s, p2.s, p3.s
; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d
; CHECK-NEXT: incd z0.d, all, mul #8
; CHECK-NEXT: incd z1.d, all, mul #8
; CHECK-NEXT: uzp1 p4.s, p4.s, p5.s
; CHECK-NEXT: uzp1 p3.s, p6.s, p3.s
-; CHECK-NEXT: uqadd z0.d, z3.d, z0.d
-; CHECK-NEXT: uqadd z1.d, z3.d, z1.d
+; CHECK-NEXT: uqadd z0.d, z0.d, z3.d
+; CHECK-NEXT: uqadd z1.d, z1.d, z3.d
; CHECK-NEXT: incd z2.d, all, mul #8
; CHECK-NEXT: incd z5.d, all, mul #8
; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h
; CHECK-NEXT: uzp1 p2.h, p4.h, p3.h
; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z0.d
; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z1.d
-; CHECK-NEXT: uqadd z0.d, z3.d, z2.d
-; CHECK-NEXT: uqadd z1.d, z3.d, z5.d
+; CHECK-NEXT: uqadd z0.d, z2.d, z3.d
+; CHECK-NEXT: uqadd z1.d, z5.d, z3.d
; CHECK-NEXT: incd z7.d, all, mul #8
; CHECK-NEXT: incd z24.d, all, mul #8
; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d
; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d
-; CHECK-NEXT: uqadd z0.d, z3.d, z7.d
-; CHECK-NEXT: uqadd z1.d, z3.d, z24.d
+; CHECK-NEXT: uqadd z0.d, z7.d, z3.d
+; CHECK-NEXT: uqadd z1.d, z24.d, z3.d
; CHECK-NEXT: incd z25.d, all, mul #8
; CHECK-NEXT: incd z26.d, all, mul #8
; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s
; CHECK-NEXT: uzp1 p4.s, p5.s, p6.s
; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d
; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d
-; CHECK-NEXT: uqadd z0.d, z3.d, z25.d
-; CHECK-NEXT: uqadd z1.d, z3.d, z26.d
+; CHECK-NEXT: uqadd z0.d, z25.d, z3.d
+; CHECK-NEXT: uqadd z1.d, z26.d, z3.d
; CHECK-NEXT: cmphi p7.d, p0/z, z4.d, z0.d
; CHECK-NEXT: cmphi p0.d, p0/z, z4.d, z1.d
; CHECK-NEXT: uzp1 p5.s, p5.s, p6.s
@@ -308,9 +308,9 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i8(i8 %index, i8 %TC) {
; CHECK-NEXT: mov z2.b, w0
; CHECK-NEXT: add z1.b, z0.b, z1.b
; CHECK-NEXT: mov z3.b, w1
-; CHECK-NEXT: uqadd z0.b, z2.b, z0.b
+; CHECK-NEXT: uqadd z0.b, z0.b, z2.b
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: uqadd z1.b, z2.b, z1.b
+; CHECK-NEXT: uqadd z1.b, z1.b, z2.b
; CHECK-NEXT: cmphi p0.b, p1/z, z3.b, z0.b
; CHECK-NEXT: cmphi p1.b, p1/z, z3.b, z1.b
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll
index 20c0ee35d8cd..61aafb674999 100644
--- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll
@@ -218,7 +218,7 @@ define <vscale x 2 x i64> @multiple_use_stepvector_nxv2i64_1(i64 %data) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: mov z1.d, x0
-; CHECK-NEXT: add z1.d, z1.d, z0.d
+; CHECK-NEXT: add z1.d, z0.d, z1.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
@@ -292,11 +292,7 @@ entry:
define <vscale x 2 x i64> @mul_add_stepvector_nxv2i64_commutative(i64 %x, i64 %y) {
; CHECK-LABEL: mul_add_stepvector_nxv2i64_commutative:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: index z1.d, #0, #1
-; CHECK-NEXT: mov z2.d, x1
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z0.d, x0
-; CHECK-NEXT: mla z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT: index z0.d, x0, x1
; CHECK-NEXT: ret
entry:
%0 = insertelement <vscale x 2 x i64> poison, i64 %y, i32 0
More information about the llvm-commits
mailing list