[llvm] a362312 - [AArch64] Support all extend/shift op for pattern: (ExtendOrShfitNode - Y) + Z --> (Z - Y) + ExtendOrShfitNode

Wed Nov 9 02:06:42 PST 2022

Author: chenglin.bi
Date: 2022-11-09T18:06:35+08:00
New Revision: a3623128969551d10140bf5cc19f33fcc4ca0573

URL: https://github.com/llvm/llvm-project/commit/a3623128969551d10140bf5cc19f33fcc4ca0573
DIFF: https://github.com/llvm/llvm-project/commit/a3623128969551d10140bf5cc19f33fcc4ca0573.diff

LOG: [AArch64] Support all extend/shift op for pattern: (ExtendOrShfitNode - Y) + Z --> (Z - Y) + ExtendOrShfitNode

Followup rG325a3083b5b24
Add SRL/SRA/xxx_EXTEND/AND support

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D137069

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/addsub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fce26dd6c21f..f58b7c4c3b7c 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16906,34 +16906,51 @@ static SDValue performBuildVectorCombine(SDNode *N,
   return SDValue();
 }
 
-// ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
-static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z,
-                                         SelectionDAG &DAG) {
-  auto IsOneUseShiftC = [&](SDValue Shift) {
-    if (!Shift.hasOneUse())
-      return false;
+// Check an node is an extend or shift operand
+static bool isExtendOrShiftOperand(SDValue N) {
+  unsigned Opcode = N.getOpcode();
+  if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_INREG ||
+      Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) {
+    EVT SrcVT;
+    if (Opcode == ISD::SIGN_EXTEND_INREG)
+      SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
+    else
+      SrcVT = N.getOperand(0).getValueType();
 
-    // TODO: support SRL and SRA also
-    if (Shift.getOpcode() != ISD::SHL)
+    return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8;
+  } else if (Opcode == ISD::AND) {
+    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!CSD)
       return false;
+    uint64_t AndMask = CSD->getZExtValue();
+    return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff;
+  } else if (Opcode == ISD::SHL || Opcode == ISD::SRL || Opcode == ISD::SRA) {
+    return isa<ConstantSDNode>(N.getOperand(1));
+  }
 
-    if (!isa<ConstantSDNode>(Shift.getOperand(1)))
-      return false;
-    return true;
+  return false;
+}
+
+// (N - Y) + Z --> (Z - Y) + N
+// when N is an extend or shift operand
+static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z,
+                                         SelectionDAG &DAG) {
+  auto IsOneUseExtend = [](SDValue N) {
+    return N.hasOneUse() && isExtendOrShiftOperand(N);
   };
 
   // DAGCombiner will revert the combination when Z is constant cause
   // dead loop. So don't enable the combination when Z is constant.
   // If Z is one use shift C, we also can't do the optimization.
   // It will falling to self infinite loop.
-  if (isa<ConstantSDNode>(Z) || IsOneUseShiftC(Z))
+  if (isa<ConstantSDNode>(Z) || IsOneUseExtend(Z))
     return SDValue();
 
   if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse())
     return SDValue();
 
   SDValue Shift = SUB.getOperand(0);
-  if (!IsOneUseShiftC(Shift))
+  if (!IsOneUseExtend(Shift))
     return SDValue();
 
   SDLoc DL(N);

diff  --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 4210cc5193bc..3020576e0bbf 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -711,9 +711,8 @@ define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) {
 define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: commute_subop0_lshr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, #3
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w0, w8, w2
+; CHECK-NEXT:    sub w8, w2, w1
+; CHECK-NEXT:    add w0, w8, w0, lsr #3
 ; CHECK-NEXT:    ret
   %lshr = lshr i32 %x, 3
   %sub = sub i32 %lshr, %y
@@ -725,9 +724,8 @@ define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) {
 define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: commute_subop0_ashr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    asr w8, w0, #3
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w0, w8, w2
+; CHECK-NEXT:    sub w8, w2, w1
+; CHECK-NEXT:    add w0, w8, w0, asr #3
 ; CHECK-NEXT:    ret
   %ashr = ashr i32 %x, 3
   %sub = sub i32 %ashr, %y
@@ -739,10 +737,8 @@ define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) {
 define i64 @commute_subop0_sext(i32 %x, i64 %y, i64 %z) {
 ; CHECK-LABEL: commute_subop0_sext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    sxtw x8, w0
-; CHECK-NEXT:    sub x8, x8, x1
-; CHECK-NEXT:    add x0, x8, x2
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    add x0, x8, w0, sxtw
 ; CHECK-NEXT:    ret
   %sext = sext i32 %x to i64
   %sub = sub i64 %sext, %y
@@ -754,9 +750,8 @@ define i64 @commute_subop0_sext(i32 %x, i64 %y, i64 %z) {
 define i64 @commute_subop0_sext_inreg(i64 %x, i64 %y, i64 %z) {
 ; CHECK-LABEL: commute_subop0_sext_inreg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth x8, w0
-; CHECK-NEXT:    sub x8, x8, x1
-; CHECK-NEXT:    add x0, x8, x2
+; CHECK-NEXT:    sub x8, x2, x1
+; CHECK-NEXT:    add x0, x8, w0, sxth
 ; CHECK-NEXT:    ret
   %shl = shl i64 %x, 48
   %ashr = ashr i64 %shl, 48
@@ -769,9 +764,8 @@ define i64 @commute_subop0_sext_inreg(i64 %x, i64 %y, i64 %z) {
 define i32 @commute_subop0_zext(i16 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: commute_subop0_zext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w0, w8, w2
+; CHECK-NEXT:    sub w8, w2, w1
+; CHECK-NEXT:    add w0, w8, w0, uxth
 ; CHECK-NEXT:    ret
   %zext = zext i16 %x to i32
   %sub = sub i32 %zext, %y
@@ -785,9 +779,8 @@ define i8 @commute_subop0_anyext(i16 %a, i16 %b, i32 %c) {
 ; CHECK-LABEL: commute_subop0_anyext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #111
-; CHECK-NEXT:    neg w9, w1
+; CHECK-NEXT:    sub w9, w2, w1
 ; CHECK-NEXT:    madd w8, w0, w8, w9
-; CHECK-NEXT:    add w8, w8, w2
 ; CHECK-NEXT:    lsl w8, w8, #3
 ; CHECK-NEXT:    sub w0, w8, #1776
 ; CHECK-NEXT:    ret
@@ -806,9 +799,8 @@ define i8 @commute_subop0_anyext(i16 %a, i16 %b, i32 %c) {
 define i32 @commute_subop0_and(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: commute_subop0_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w0, w8, w2
+; CHECK-NEXT:    sub w8, w2, w1
+; CHECK-NEXT:    add w0, w8, w0, uxtb
 ; CHECK-NEXT:    ret
   %and = and i32 %x, 255
   %sub = sub i32 %and, %y