[llvm] 45299fb - Reapply [AArch64] fold subs ugt/ult to ands when the second operand is mask/pow2
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 18 03:24:34 PST 2023
Author: chenglin.bi
Date: 2023-01-18T19:24:20+08:00
New Revision: 45299fb0f99cd94aeb33d954c4d68123a0bc7e9e
URL: https://github.com/llvm/llvm-project/commit/45299fb0f99cd94aeb33d954c4d68123a0bc7e9e
DIFF: https://github.com/llvm/llvm-project/commit/45299fb0f99cd94aeb33d954c4d68123a0bc7e9e.diff
LOG: Reapply [AArch64] fold subs ugt/ult to ands when the second operand is mask/pow2
Origianl patch made a mistake that ugt is reverse cc should be ule.
And ule < C will be generalize to ult < C + 1. So the new patch add support for ult < Pow2 case.
https://alive2.llvm.org/ce/z/naBw5A
Reviewed By: samtebbs, chapuni
Differential Revision: https://reviews.llvm.org/D141829
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/and-mask-removal.ll
llvm/test/CodeGen/AArch64/andcompare.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fe48360b5cf1f..9305a65b5a0ad 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19806,6 +19806,54 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
return false;
}
+// (X & C) >u Mask --> (X & (C & (~Mask)) != 0
+// (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
+static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode,
+ SDNode *AndNode, SelectionDAG &DAG,
+ unsigned CCIndex, unsigned CmpIndex,
+ unsigned CC) {
+ ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
+ if (!SubsC)
+ return SDValue();
+
+ APInt SubsAP = SubsC->getAPIntValue();
+ if (CC == AArch64CC::HI) {
+ if (!SubsAP.isMask())
+ return SDValue();
+ } else if (CC == AArch64CC::LO) {
+ if (!SubsAP.isPowerOf2())
+ return SDValue();
+ } else
+ return SDValue();
+
+ ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
+ if (!AndC)
+ return SDValue();
+
+ APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1);
+
+ SDLoc DL(N);
+ APInt AndSMask = (~MaskAP) & AndC->getAPIntValue();
+ SDValue ANDS = DAG.getNode(
+ AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
+ DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
+ SDValue AArch64_CC =
+ DAG.getConstant(CC == AArch64CC::HI ? AArch64CC::NE : AArch64CC::EQ, DL,
+ N->getOperand(CCIndex)->getValueType(0));
+
+ // For now, only performCSELCombine and performBRCONDCombine call this
+ // function. And both of them pass 2 for CCIndex, 3 for CmpIndex with 4
+ // operands. So just init the ops direct to simplify the code. If we have some
+ // other case with
diff erent CCIndex, CmpIndex, we need to use for loop to
+ // rewrite the code here.
+ // TODO: Do we need to assert number of operand is 4 here?
+ assert((CCIndex == 2 && CmpIndex == 3) &&
+ "Expected CCIndex to be 2 and CmpIndex to be 3.");
+ SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
+ ANDS.getValue(1)};
+ return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
+}
+
static
SDValue performCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -19827,6 +19875,10 @@ SDValue performCONDCombine(SDNode *N,
if (AndNode->getOpcode() != ISD::AND)
return SDValue();
+ if (SDValue Val = performSubsToAndsCombine(N, SubsNode, AndNode, DAG, CCIndex,
+ CmpIndex, CC))
+ return Val;
+
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
uint32_t CNV = CN->getZExtValue();
if (CNV == 255)
diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index f3307144e08df..b86c600e41acf 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -510,8 +510,8 @@ define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) {
; CHECK-SD-LABEL: pr58109b:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: add w8, w0, #1
-; CHECK-SD-NEXT: cmp w8, #2
-; CHECK-SD-NEXT: csel x0, x1, x2, lo
+; CHECK-SD-NEXT: tst w8, #0xfe
+; CHECK-SD-NEXT: csel x0, x1, x2, eq
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: pr58109b:
diff --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll
index 40924d4dc7c06..8f0953b398c4c 100644
--- a/llvm/test/CodeGen/AArch64/andcompare.ll
+++ b/llvm/test/CodeGen/AArch64/andcompare.ll
@@ -2401,5 +2401,127 @@ entry:
%z = zext i1 %a to i32
ret i32 %z
}
+
+define i32 @cmp_to_ands1(i32 %num) {
+; SDISEL-LABEL: cmp_to_ands1:
+; SDISEL: // %bb.0:
+; SDISEL-NEXT: and w8, w0, #0xff
+; SDISEL-NEXT: tst w0, #0xfe
+; SDISEL-NEXT: csel w0, w8, wzr, ne
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: cmp_to_ands1:
+; GISEL: // %bb.0:
+; GISEL-NEXT: and w8, w0, #0xff
+; GISEL-NEXT: cmp w8, #1
+; GISEL-NEXT: csel w0, w8, wzr, hi
+; GISEL-NEXT: ret
+ %and = and i32 %num, 255
+ %cmp = icmp ugt i32 %and, 1
+ %r = select i1 %cmp, i32 %and, i32 0
+ ret i32 %r
+}
+
+define i32 @cmp_to_ands2(i32 %num) {
+; SDISEL-LABEL: cmp_to_ands2:
+; SDISEL: // %bb.0:
+; SDISEL-NEXT: and w8, w0, #0xfe
+; SDISEL-NEXT: tst w0, #0xc0
+; SDISEL-NEXT: csel w0, w8, wzr, ne
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: cmp_to_ands2:
+; GISEL: // %bb.0:
+; GISEL-NEXT: and w8, w0, #0xfe
+; GISEL-NEXT: cmp w8, #63
+; GISEL-NEXT: csel w0, w8, wzr, hi
+; GISEL-NEXT: ret
+ %and = and i32 %num, 254
+ %cmp = icmp ugt i32 %and, 63
+ %r = select i1 %cmp, i32 %and, i32 0
+ ret i32 %r
+}
+
+define i32 @cmp_to_ands3(i32 %num, i32 %a) {
+; SDISEL-LABEL: cmp_to_ands3:
+; SDISEL: // %bb.0:
+; SDISEL-NEXT: tst w0, #0x10
+; SDISEL-NEXT: csel w0, w1, wzr, ne
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: cmp_to_ands3:
+; GISEL: // %bb.0:
+; GISEL-NEXT: mov w8, #23
+; GISEL-NEXT: and w8, w0, w8
+; GISEL-NEXT: cmp w8, #7
+; GISEL-NEXT: csel w0, w1, wzr, hi
+; GISEL-NEXT: ret
+ %and = and i32 %num, 23
+ %cmp = icmp ugt i32 %and, 7
+ %r = select i1 %cmp, i32 %a, i32 0
+ ret i32 %r
+}
+
+define i32 @cmp_to_ands4(i32 %num, i32 %a) {
+; SDISEL-LABEL: cmp_to_ands4:
+; SDISEL: // %bb.0:
+; SDISEL-NEXT: and w8, w0, #0x30
+; SDISEL-NEXT: tst w0, #0x20
+; SDISEL-NEXT: csel w0, w8, w1, eq
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: cmp_to_ands4:
+; GISEL: // %bb.0:
+; GISEL-NEXT: and w8, w0, #0x30
+; GISEL-NEXT: cmp w8, #31
+; GISEL-NEXT: csel w0, w8, w1, ls
+; GISEL-NEXT: ret
+ %and = and i32 %num, 48
+ %cmp = icmp ule i32 %and, 31
+ %r = select i1 %cmp, i32 %and, i32 %a
+ ret i32 %r
+}
+
+define i32 @cmp_to_ands5(i32 %num, i32 %a) {
+; SDISEL-LABEL: cmp_to_ands5:
+; SDISEL: // %bb.0:
+; SDISEL-NEXT: and w8, w0, #0xf8
+; SDISEL-NEXT: tst w0, #0xc0
+; SDISEL-NEXT: csel w0, w8, w1, eq
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: cmp_to_ands5:
+; GISEL: // %bb.0:
+; GISEL-NEXT: and w8, w0, #0xf8
+; GISEL-NEXT: cmp w8, #64
+; GISEL-NEXT: csel w0, w8, w1, lo
+; GISEL-NEXT: ret
+ %and = and i32 %num, 248
+ %cmp = icmp ult i32 %and, 64
+ %r = select i1 %cmp, i32 %and, i32 %a
+ ret i32 %r
+}
+
+define i32 @cmp_to_ands6(i32 %num) {
+; SDISEL-LABEL: cmp_to_ands6:
+; SDISEL: // %bb.0:
+; SDISEL-NEXT: and w8, w0, #0xfe
+; SDISEL-NEXT: tst w0, #0xf0
+; SDISEL-NEXT: csel w0, w8, wzr, ne
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: cmp_to_ands6:
+; GISEL: // %bb.0:
+; GISEL-NEXT: and w8, w0, #0xfe
+; GISEL-NEXT: cmp w8, #16
+; GISEL-NEXT: csel w0, w8, wzr, hs
+; GISEL-NEXT: ret
+ %and = and i32 %num, 254
+ %cmp = icmp uge i32 %and, 16
+ %r = select i1 %cmp, i32 %and, i32 0
+ ret i32 %r
+}
+
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
More information about the llvm-commits
mailing list