[llvm] 4a64024 - [AArch64] fold subs ugt/ult to ands when the second operand is a mask

Mon Jan 16 20:01:59 PST 2023

Author: chenglin.bi
Date: 2023-01-17T12:01:53+08:00
New Revision: 4a64024c1410692197e4b54e27e7b269a67c78f4

URL: https://github.com/llvm/llvm-project/commit/4a64024c1410692197e4b54e27e7b269a67c78f4
DIFF: https://github.com/llvm/llvm-project/commit/4a64024c1410692197e4b54e27e7b269a67c78f4.diff

LOG: [AArch64] fold subs ugt/ult to ands when the second operand is a mask

https://alive2.llvm.org/ce/z/pLhHI9

Fix: https://github.com/llvm/llvm-project/issues/59598

Reviewed By: samtebbs

Differential Revision: https://reviews.llvm.org/D141829

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/andcompare.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a7c0d7b1996c..c6bf71cc9d7a 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19813,6 +19813,49 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
   return false;
 }
 
+// (X & C) >u Mask --> (X & (C & (~Mask)) != 0
+// (X & C) <u Mask --> (X & (C & (~Mask)) == 0
+static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode,
+                                        SDNode *AndNode, SelectionDAG &DAG,
+                                        unsigned CCIndex, unsigned CmpIndex,
+                                        unsigned CC) {
+  if (CC != AArch64CC::HI && CC != AArch64CC::LO)
+    return SDValue();
+
+  ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
+  if (!AndC)
+    return SDValue();
+
+  ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
+  if (!SubsC)
+    return SDValue();
+
+  APInt SubsAP = SubsC->getAPIntValue();
+  if (!SubsAP.isMask())
+    return SDValue();
+
+  SDLoc DL(N);
+  APInt AndSMask = (~SubsAP) & AndC->getAPIntValue();
+  SDValue ANDS = DAG.getNode(
+      AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
+      DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
+  SDValue AArch64_CC =
+      DAG.getConstant(CC == AArch64CC::HI ? AArch64CC::NE : AArch64CC::EQ, DL,
+                      N->getOperand(CCIndex)->getValueType(0));
+
+  // For now, only performCSELCombine and performBRCONDCombine call this
+  // function. And both of them pass 2 for CCIndex, 3 for CmpIndex with 4
+  // operands. So just init the ops direct to simplify the code. If we have some
+  // other case with 
diff erent CCIndex, CmpIndex, we need to use for loop to
+  // rewrite the code here.
+  // TODO: Do we need to assert number of operand is 4 here?
+  assert((CCIndex == 2 && CmpIndex == 3) &&
+         "Expected CCIndex to be 2 and CmpIndex to be 3.");
+  SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
+                   ANDS.getValue(1)};
+  return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
+}
+
 static
 SDValue performCONDCombine(SDNode *N,
                            TargetLowering::DAGCombinerInfo &DCI,
@@ -19834,6 +19877,10 @@ SDValue performCONDCombine(SDNode *N,
   if (AndNode->getOpcode() != ISD::AND)
     return SDValue();
 
+  if (SDValue Val = performSubsToAndsCombine(N, SubsNode, AndNode, DAG, CCIndex,
+                                             CmpIndex, CC))
+    return Val;
+
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
     uint32_t CNV = CN->getZExtValue();
     if (CNV == 255)

diff  --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll
index 40924d4dc7c0..4897be89e284 100644
--- a/llvm/test/CodeGen/AArch64/andcompare.ll
+++ b/llvm/test/CodeGen/AArch64/andcompare.ll
@@ -2401,5 +2401,86 @@ entry:
   %z = zext i1 %a to i32
   ret i32 %z
 }
+
+define i32 @cmp_to_ands1(i32 %num) {
+; SDISEL-LABEL: cmp_to_ands1:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    and w8, w0, #0xff
+; SDISEL-NEXT:    tst w0, #0xfe
+; SDISEL-NEXT:    csel w0, w8, wzr, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands1:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0xff
+; GISEL-NEXT:    cmp w8, #1
+; GISEL-NEXT:    csel w0, w8, wzr, hi
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 255
+  %cmp = icmp ugt i32 %and, 1
+  %r = select i1 %cmp, i32 %and, i32 0
+  ret i32 %r
+}
+
+define i32 @cmp_to_ands2(i32 %num) {
+; SDISEL-LABEL: cmp_to_ands2:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    and w8, w0, #0xfe
+; SDISEL-NEXT:    tst w0, #0xc0
+; SDISEL-NEXT:    csel w0, w8, wzr, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0xfe
+; GISEL-NEXT:    cmp w8, #63
+; GISEL-NEXT:    csel w0, w8, wzr, hi
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 254
+  %cmp = icmp ugt i32 %and, 63
+  %r = select i1 %cmp, i32 %and, i32 0
+  ret i32 %r
+}
+
+define i32 @cmp_to_ands3(i32 %num, i32 %a) {
+; SDISEL-LABEL: cmp_to_ands3:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    tst w0, #0x10
+; SDISEL-NEXT:    csel w0, w1, wzr, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #23
+; GISEL-NEXT:    and w8, w0, w8
+; GISEL-NEXT:    cmp w8, #7
+; GISEL-NEXT:    csel w0, w1, wzr, hi
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 23
+  %cmp = icmp ugt i32 %and, 7
+  %r = select i1 %cmp, i32 %a, i32 0
+  ret i32 %r
+}
+
+define i32 @cmp_to_ands4(i32 %num, i32 %a) {
+; SDISEL-LABEL: cmp_to_ands4:
+; SDISEL:       // %bb.0:
+; SDISEL-NEXT:    and w8, w0, #0x30
+; SDISEL-NEXT:    tst w0, #0x20
+; SDISEL-NEXT:    csel w0, w8, w1, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: cmp_to_ands4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    and w8, w0, #0x30
+; GISEL-NEXT:    cmp w8, #31
+; GISEL-NEXT:    csel w0, w8, w1, lo
+; GISEL-NEXT:    ret
+  %and = and i32 %num, 48
+  %cmp = icmp ult i32 %and, 31
+  %r = select i1 %cmp, i32 %and, i32 %a
+  ret i32 %r
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}