[llvm] 45299fb - Reapply [AArch64] fold subs ugt/ult to ands when the second operand is mask/pow2

Wed Jan 18 03:24:34 PST 2023

Author: chenglin.bi
Date: 2023-01-18T19:24:20+08:00
New Revision: 45299fb0f99cd94aeb33d954c4d68123a0bc7e9e

URL: https://github.com/llvm/llvm-project/commit/45299fb0f99cd94aeb33d954c4d68123a0bc7e9e
DIFF: https://github.com/llvm/llvm-project/commit/45299fb0f99cd94aeb33d954c4d68123a0bc7e9e.diff

LOG: Reapply [AArch64] fold subs ugt/ult to ands when the second operand is mask/pow2

Origianl patch made a mistake that ugt is reverse cc should be ule.
And ule < C will be generalize to ult < C + 1. So the new patch add support for ult < Pow2 case.

https://alive2.llvm.org/ce/z/naBw5A

Reviewed By: samtebbs, chapuni

Differential Revision: https://reviews.llvm.org/D141829

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/and-mask-removal.ll
    llvm/test/CodeGen/AArch64/andcompare.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fe48360b5cf1f..9305a65b5a0ad 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19806,6 +19806,54 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
   return false;
 }
 
+// (X & C) >u Mask --> (X & (C & (~Mask)) != 0
+// (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
+static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode,
+                                        SDNode *AndNode, SelectionDAG &DAG,
+                                        unsigned CCIndex, unsigned CmpIndex,
+                                        unsigned CC) {
+  ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
+  if (!SubsC)
+    return SDValue();
+
+  APInt SubsAP = SubsC->getAPIntValue();
+  if (CC == AArch64CC::HI) {
+    if (!SubsAP.isMask())
+      return SDValue();
+  } else if (CC == AArch64CC::LO) {
+    if (!SubsAP.isPowerOf2())
+      return SDValue();
+  } else
+    return SDValue();
+
+  ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
+  if (!AndC)
+    return SDValue();
+
+  APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1);
+
+  SDLoc DL(N);
+  APInt AndSMask = (~MaskAP) & AndC->getAPIntValue();
+  SDValue ANDS = DAG.getNode(
+      AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
+      DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
+  SDValue AArch64_CC =
+      DAG.getConstant(CC == AArch64CC::HI ? AArch64CC::NE : AArch64CC::EQ, DL,
+                      N->getOperand(CCIndex)->getValueType(0));
+
+  // For now, only performCSELCombine and performBRCONDCombine call this
+  // function. And both of them pass 2 for CCIndex, 3 for CmpIndex with 4
+  // operands. So just init the ops direct to simplify the code. If we have some
+  // other case with 
diff erent CCIndex, CmpIndex, we need to use for loop to
+  // rewrite the code here.
+  // TODO: Do we need to assert number of operand is 4 here?
+  assert((CCIndex == 2 && CmpIndex == 3) &&
+         "Expected CCIndex to be 2 and CmpIndex to be 3.");
+  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
+                   ANDS.getValue(1)};
+  return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
+}
+
 static
 SDValue performCONDCombine(SDNode *N,
                            TargetLowering::DAGCombinerInfo &DCI,
@@ -19827,6 +19875,10 @@ SDValue performCONDCombine(SDNode *N,
   if (AndNode->getOpcode() != ISD::AND)
     return SDValue();
 
+  if (SDValue Val = performSubsToAndsCombine(N, SubsNode, AndNode, DAG, CCIndex,
+                                             CmpIndex, CC))
+    return Val;
+
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
     uint32_t CNV = CN->getZExtValue();
     if (CNV == 255)

diff  --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index f3307144e08df..b86c600e41acf 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -510,8 +510,8 @@ define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) {
 ; CHECK-SD-LABEL: pr58109b:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    add w8, w0, #1
-; CHECK-SD-NEXT:    cmp w8, #2
-; CHECK-SD-NEXT:    csel x0, x1, x2, lo
+; CHECK-SD-NEXT:    tst w8, #0xfe
+; CHECK-SD-NEXT:    csel x0, x1, x2, eq
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: pr58109b:

diff  --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll
index 40924d4dc7c06..8f0953b398c4c 100644
--- a/llvm/test/CodeGen/AArch64/andcompare.ll
+++ b/llvm/test/CodeGen/AArch64/andcompare.ll
@@ -2401,5 +2401,127 @@ entry:
   %z = zext i1 %a to i32
   ret i32 %z
 }
+
+define i32 @cmp_to_ands1(i32 %num) {
+; SDISEL-LABEL: cmp_to_ands1:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    and w8, w0, #0xff
+; SDISEL-NEXT:    tst w0, #0xfe
+; SDISEL-NEXT:    csel w0, w8, wzr, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands1:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0xff
+; GISEL-NEXT:    cmp w8, #1
+; GISEL-NEXT:    csel w0, w8, wzr, hi
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 255
+  %cmp = icmp ugt i32 %and, 1
+  %r = select i1 %cmp, i32 %and, i32 0
+  ret i32 %r
+}
+
+define i32 @cmp_to_ands2(i32 %num) {
+; SDISEL-LABEL: cmp_to_ands2:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    and w8, w0, #0xfe
+; SDISEL-NEXT:    tst w0, #0xc0
+; SDISEL-NEXT:    csel w0, w8, wzr, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0xfe
+; GISEL-NEXT:    cmp w8, #63
+; GISEL-NEXT:    csel w0, w8, wzr, hi
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 254
+  %cmp = icmp ugt i32 %and, 63
+  %r = select i1 %cmp, i32 %and, i32 0
+  ret i32 %r
+}
+
+define i32 @cmp_to_ands3(i32 %num, i32 %a) {
+; SDISEL-LABEL: cmp_to_ands3:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    tst w0, #0x10
+; SDISEL-NEXT:    csel w0, w1, wzr, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #23
+; GISEL-NEXT:    and w8, w0, w8
+; GISEL-NEXT:    cmp w8, #7
+; GISEL-NEXT:    csel w0, w1, wzr, hi
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 23
+  %cmp = icmp ugt i32 %and, 7
+  %r = select i1 %cmp, i32 %a, i32 0
+  ret i32 %r
+}
+
+define i32 @cmp_to_ands4(i32 %num, i32 %a) {
+; SDISEL-LABEL: cmp_to_ands4:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    and w8, w0, #0x30
+; SDISEL-NEXT:    tst w0, #0x20
+; SDISEL-NEXT:    csel w0, w8, w1, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0x30
+; GISEL-NEXT:    cmp w8, #31
+; GISEL-NEXT:    csel w0, w8, w1, ls
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 48
+  %cmp = icmp ule i32 %and, 31
+  %r = select i1 %cmp, i32 %and, i32 %a
+  ret i32 %r
+}
+
+define i32 @cmp_to_ands5(i32 %num, i32 %a) {
+; SDISEL-LABEL: cmp_to_ands5:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    and w8, w0, #0xf8
+; SDISEL-NEXT:    tst w0, #0xc0
+; SDISEL-NEXT:    csel w0, w8, w1, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands5:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0xf8
+; GISEL-NEXT:    cmp w8, #64
+; GISEL-NEXT:    csel w0, w8, w1, lo
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 248
+  %cmp = icmp ult i32 %and, 64
+  %r = select i1 %cmp, i32 %and, i32 %a
+  ret i32 %r
+}
+
+define i32 @cmp_to_ands6(i32 %num) {
+; SDISEL-LABEL: cmp_to_ands6:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    and w8, w0, #0xfe
+; SDISEL-NEXT:    tst w0, #0xf0
+; SDISEL-NEXT:    csel w0, w8, wzr, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands6:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0xfe
+; GISEL-NEXT:    cmp w8, #16
+; GISEL-NEXT:    csel w0, w8, wzr, hs
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 254
+  %cmp = icmp uge i32 %and, 16
+  %r = select i1 %cmp, i32 %and, i32 0
+  ret i32 %r
+}
+
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}