[llvm] 325a308 - [AArch64] Adjust operand sequence for Add+Sub to combine more inline shift
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 31 03:57:53 PDT 2022
Author: chenglin.bi
Date: 2022-10-31T18:57:41+08:00
New Revision: 325a3083b5b24fd0635fb5a49566dd573afc55ef
URL: https://github.com/llvm/llvm-project/commit/325a3083b5b24fd0635fb5a49566dd573afc55ef
DIFF: https://github.com/llvm/llvm-project/commit/325a3083b5b24fd0635fb5a49566dd573afc55ef.diff
LOG: [AArch64] Adjust operand sequence for Add+Sub to combine more inline shift
((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
Fix AArch part: #55714
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D136158
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/addsub.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a97a24c435510..afbbe1fc0ddbe 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16854,6 +16854,44 @@ static SDValue performBuildVectorCombine(SDNode *N,
return SDValue();
}
+// ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
+static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z,
+ SelectionDAG &DAG) {
+ auto IsOneUseShiftC = [&](SDValue Shift) {
+ if (!Shift.hasOneUse())
+ return false;
+
+ // TODO: support SRL and SRA also
+ if (Shift.getOpcode() != ISD::SHL)
+ return false;
+
+ if (!isa<ConstantSDNode>(Shift.getOperand(1)))
+ return false;
+ return true;
+ };
+
+ // DAGCombiner will revert the combination when Z is constant cause
+ // dead loop. So don't enable the combination when Z is constant.
+ // If Z is one use shift C, we also can't do the optimization.
+ // It will falling to self infinite loop.
+ if (isa<ConstantSDNode>(Z) || IsOneUseShiftC(Z))
+ return SDValue();
+
+ if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse())
+ return SDValue();
+
+ SDValue Shift = SUB.getOperand(0);
+ if (!IsOneUseShiftC(Shift))
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue Y = SUB.getOperand(1);
+ SDValue NewSub = DAG.getNode(ISD::SUB, DL, VT, Z, Y);
+ return DAG.getNode(ISD::ADD, DL, VT, NewSub, Shift);
+}
+
static SDValue performAddCombineForShiftedOperands(SDNode *N,
SelectionDAG &DAG) {
// NOTE: Swapping LHS and RHS is not done for SUB, since SUB is not
@@ -16871,6 +16909,11 @@ static SDValue performAddCombineForShiftedOperands(SDNode *N,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ if (SDValue Val = performAddCombineSubShift(N, LHS, RHS, DAG))
+ return Val;
+ if (SDValue Val = performAddCombineSubShift(N, RHS, LHS, DAG))
+ return Val;
+
uint64_t LHSImm = 0, RHSImm = 0;
// If both operand are shifted by imm and shift amount is not greater than 4
// for one operand, swap LHS and RHS to put operand with smaller shift amount
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 663372794e76c..33302f09ccdf7 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -694,12 +694,12 @@ if.end: ; preds = %if.then, %lor.lhs.f
ret i32 undef
}
+; ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: commute_subop0:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, #3
-; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: add w0, w8, w2
+; CHECK-NEXT: sub w8, w2, w1
+; CHECK-NEXT: add w0, w8, w0, lsl #3
; CHECK-NEXT: ret
%shl = shl i32 %x, 3
%sub = sub i32 %shl, %y
@@ -707,12 +707,40 @@ define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) {
ret i32 %add
}
+; ((X << C) - Y) + Z --> (Z - Y) + (X << C)
+define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: commute_subop0_lshr:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsr w8, w0, #3
+; CHECK-NEXT: sub w8, w8, w1
+; CHECK-NEXT: add w0, w8, w2
+; CHECK-NEXT: ret
+ %lshr = lshr i32 %x, 3
+ %sub = sub i32 %lshr, %y
+ %add = add i32 %sub, %z
+ ret i32 %add
+}
+
+; ((X << C) - Y) + Z --> (Z - Y) + (X << C)
+define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: commute_subop0_ashr:
+; CHECK: // %bb.0:
+; CHECK-NEXT: asr w8, w0, #3
+; CHECK-NEXT: sub w8, w8, w1
+; CHECK-NEXT: add w0, w8, w2
+; CHECK-NEXT: ret
+ %ashr = ashr i32 %x, 3
+ %sub = sub i32 %ashr, %y
+ %add = add i32 %sub, %z
+ ret i32 %add
+}
+
+; Z + ((X >> C) - Y) --> (Z - Y) + (X >> C)
define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: commute_subop0_cadd:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, #3
-; CHECK-NEXT: sub w8, w8, w1
-; CHECK-NEXT: add w0, w2, w8
+; CHECK-NEXT: sub w8, w2, w1
+; CHECK-NEXT: add w0, w8, w0, lsl #3
; CHECK-NEXT: ret
%shl = shl i32 %x, 3
%sub = sub i32 %shl, %y
@@ -720,14 +748,61 @@ define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) {
ret i32 %add
}
+; Y + ((X >> C) - X) --> (Y - X) + (X >> C)
define i32 @commute_subop0_mul(i32 %x, i32 %y) {
; CHECK-LABEL: commute_subop0_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: lsl w8, w0, #3
-; CHECK-NEXT: sub w8, w8, w0
-; CHECK-NEXT: add w0, w8, w1
+; CHECK-NEXT: sub w8, w1, w0
+; CHECK-NEXT: add w0, w8, w0, lsl #3
; CHECK-NEXT: ret
%mul = mul i32 %x, 7
%add = add i32 %mul, %y
ret i32 %add
}
+
+; negative case for ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
+; Y can't be constant to avoid dead loop
+define i32 @commute_subop0_zconst(i32 %x, i32 %y) {
+; CHECK-LABEL: commute_subop0_zconst:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: sub w8, w8, w1
+; CHECK-NEXT: add w0, w8, #1
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 3
+ %sub = sub i32 %shl, %y
+ %add = add i32 %sub, 1
+ ret i32 %add
+}
+
+; negative case for ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
+; Y can't be shift C also to avoid dead loop
+define i32 @commute_subop0_zshiftc_oneuse(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: commute_subop0_zshiftc_oneuse:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsl w8, w0, #3
+; CHECK-NEXT: sub w8, w8, w1
+; CHECK-NEXT: add w0, w8, w2, lsl #2
+; CHECK-NEXT: ret
+ %xshl = shl i32 %x, 3
+ %sub = sub i32 %xshl, %y
+ %zshl = shl i32 %z, 2
+ %add = add i32 %sub, %zshl
+ ret i32 %add
+}
+
+define i32 @commute_subop0_zshiftc(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: commute_subop0_zshiftc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsl w8, w2, #2
+; CHECK-NEXT: sub w9, w8, w1
+; CHECK-NEXT: add w9, w9, w0, lsl #3
+; CHECK-NEXT: eor w0, w8, w9
+; CHECK-NEXT: ret
+ %xshl = shl i32 %x, 3
+ %sub = sub i32 %xshl, %y
+ %zshl = shl i32 %z, 2
+ %add = add i32 %sub, %zshl
+ %r = xor i32 %zshl, %add
+ ret i32 %r
+}
More information about the llvm-commits
mailing list