[llvm] ada9ab6 - [AArch64] Adjust operand sequence for Add+Sub to combine more inline shift

Wed Oct 26 19:05:24 PDT 2022

Author: chenglin.bi
Date: 2022-10-27T10:05:15+08:00
New Revision: ada9ab610727917561370e976eaea26dbbc20cce

URL: https://github.com/llvm/llvm-project/commit/ada9ab610727917561370e976eaea26dbbc20cce
DIFF: https://github.com/llvm/llvm-project/commit/ada9ab610727917561370e976eaea26dbbc20cce.diff

LOG: [AArch64] Adjust operand sequence for Add+Sub to combine more inline shift

((X >> C) - Y) + Z --> (Z - Y) + (X >> C)

Fix AArch part: #55714

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D136158

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/addsub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3194f54aab702..0aacb37c64c32 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16854,6 +16854,32 @@ static SDValue performBuildVectorCombine(SDNode *N,
   return SDValue();
 }
 
+// ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
+static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z,
+                                         SelectionDAG &DAG) {
+  // DAGCombiner will revert the combination when Z is constant cause
+  // dead loop. So don't enable the combination when Z is constant.
+  if (isa<ConstantSDNode>(Z))
+    return SDValue();
+
+  if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse())
+    return SDValue();
+
+  SDValue SHL = SUB.getOperand(0);
+  if (SHL.getOpcode() != ISD::SHL || !SHL.hasOneUse())
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(SHL.getOperand(1)))
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  SDValue Y = SUB.getOperand(1);
+  SDValue NewSub = DAG.getNode(ISD::SUB, DL, VT, Z, Y);
+  return DAG.getNode(ISD::ADD, DL, VT, NewSub, SHL);
+}
+
 static SDValue performAddCombineForShiftedOperands(SDNode *N,
                                                    SelectionDAG &DAG) {
   // NOTE: Swapping LHS and RHS is not done for SUB, since SUB is not
@@ -16871,6 +16897,11 @@ static SDValue performAddCombineForShiftedOperands(SDNode *N,
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
+  if (SDValue Val = performAddCombineSubShift(N, LHS, RHS, DAG))
+    return Val;
+  if (SDValue Val = performAddCombineSubShift(N, RHS, LHS, DAG))
+    return Val;
+
   uint64_t LHSImm = 0, RHSImm = 0;
   // If both operand are shifted by imm and shift amount is not greater than 4
   // for one operand, swap LHS and RHS to put operand with smaller shift amount

diff  --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 663372794e76c..18f6ce920115c 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -694,12 +694,12 @@ if.end:                                           ; preds = %if.then, %lor.lhs.f
   ret i32 undef
 }
 
+; ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
 define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: commute_subop0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #3
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w0, w8, w2
+; CHECK-NEXT:    sub w8, w2, w1
+; CHECK-NEXT:    add w0, w8, w0, lsl #3
 ; CHECK-NEXT:    ret
   %shl = shl i32 %x, 3
   %sub = sub i32 %shl, %y
@@ -707,12 +707,12 @@ define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) {
   ret i32 %add
 }
 
+; Z + ((X >> C) - Y) --> (Z - Y) + (X >> C)
 define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: commute_subop0_cadd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #3
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w0, w2, w8
+; CHECK-NEXT:    sub w8, w2, w1
+; CHECK-NEXT:    add w0, w8, w0, lsl #3
 ; CHECK-NEXT:    ret
   %shl = shl i32 %x, 3
   %sub = sub i32 %shl, %y
@@ -720,14 +720,29 @@ define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) {
   ret i32 %add
 }
 
+; Y + ((X >> C) - X) --> (Y - X) + (X >> C)
 define i32 @commute_subop0_mul(i32 %x, i32 %y) {
 ; CHECK-LABEL: commute_subop0_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #3
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    add w0, w8, w1
+; CHECK-NEXT:    sub w8, w1, w0
+; CHECK-NEXT:    add w0, w8, w0, lsl #3
 ; CHECK-NEXT:    ret
   %mul = mul i32 %x, 7
   %add = add i32 %mul, %y
   ret i32 %add
 }
+
+; negative case for ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
+; Z can't be constant to avoid dead loop
+define i32 @commute_subop0_zconst(i32 %x, i32 %y) {
+; CHECK-LABEL: commute_subop0_zconst:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w8, w0, #3
+; CHECK-NEXT:    sub w8, w8, w1
+; CHECK-NEXT:    add w0, w8, #1
+; CHECK-NEXT:    ret
+  %shl = shl i32 %x, 3
+  %sub = sub i32 %shl, %y
+  %add = add i32 %sub, 1
+  ret i32 %add
+}